run_agent.py

#!/usr/bin/env python3
"""
AI Agent Runner with Tool Calling

This module provides a clean, standalone agent that can execute AI models
with tool calling capabilities. It handles the conversation loop, tool execution,
and response management.

Features:
- Automatic tool calling loop until completion
- Configurable model parameters
- Error handling and recovery
- Message history management
- Support for multiple model providers

Usage:
    from run_agent import AIAgent
    
    agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
    response = agent.run_conversation("Tell me about the latest Python updates")
"""

import copy
import hashlib
import json
import logging
logger = logging.getLogger(__name__)
import os
import random
import re
import sys
import time
import threading
from types import SimpleNamespace
import uuid
from typing import List, Dict, Any, Optional
from openai import OpenAI
import fire
from datetime import datetime
from pathlib import Path

# Load .env from ~/.hermes/.env first, then project root as dev fallback
from dotenv import load_dotenv

_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
_user_env = _hermes_home / ".env"
_project_env = Path(__file__).parent / '.env'
if _user_env.exists():
    try:
        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
    logger.info("Loaded environment variables from %s", _user_env)
elif _project_env.exists():
    try:
        load_dotenv(dotenv_path=_project_env, encoding="utf-8")
    except UnicodeDecodeError:
        load_dotenv(dotenv_path=_project_env, encoding="latin-1")
    logger.info("Loaded environment variables from %s", _project_env)
else:
    logger.info("No .env file found. Using system environment variables.")

# Point mini-swe-agent at ~/.hermes/ so it shares our config
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")

# Import our tool system
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
from tools.terminal_tool import cleanup_vm
from tools.interrupt import set_interrupt as _set_interrupt
from tools.browser_tool import cleanup_browser

import requests

from hermes_constants import OPENROUTER_BASE_URL, OPENROUTER_MODELS_URL

# Agent internals extracted to agent/ package for modularity
from agent.prompt_builder import (
    DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
    MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
)
from agent.model_metadata import (
    fetch_model_metadata, get_model_context_length,
    estimate_tokens_rough, estimate_messages_tokens_rough,
    get_next_probe_tier, parse_context_limit_from_error,
    save_context_length,
)
from agent.context_compressor import ContextCompressor
from agent.prompt_caching import apply_anthropic_cache_control
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
    get_cute_tool_message as _get_cute_tool_message_impl,
    _detect_tool_failure,
)
from agent.trajectory import (
    convert_scratchpad_to_think, has_incomplete_scratchpad,
    save_trajectory as _save_trajectory_to_file,
)


class IterationBudget:
    """Thread-safe shared iteration counter for parent and child agents.

    Tracks total LLM-call iterations consumed across a parent agent and all
    its subagents.  A single ``IterationBudget`` is created by the parent
    and passed to every child so they share the same cap.

    ``execute_code`` (programmatic tool calling) iterations are refunded via
    :meth:`refund` so they don't eat into the budget.
    """

    def __init__(self, max_total: int):
        self.max_total = max_total
        self._used = 0
        self._lock = threading.Lock()

    def consume(self) -> bool:
        """Try to consume one iteration.  Returns True if allowed."""
        with self._lock:
            if self._used >= self.max_total:
                return False
            self._used += 1
            return True

    def refund(self) -> None:
        """Give back one iteration (e.g. for execute_code turns)."""
        with self._lock:
            if self._used > 0:
                self._used -= 1

    @property
    def used(self) -> int:
        return self._used

    @property
    def remaining(self) -> int:
        with self._lock:
            return max(0, self.max_total - self._used)


class AIAgent:
    """
    AI Agent with tool calling capabilities.
    
    This class manages the conversation flow, tool execution, and response handling
    for AI models that support function calling.
    """
    
    def __init__(
        self,
        base_url: str = None,
        api_key: str = None,
        provider: str = None,
        api_mode: str = None,
        model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
        max_iterations: int = 90,  # Default tool-calling iterations (shared with subagents)
        tool_delay: float = 1.0,
        enabled_toolsets: List[str] = None,
        disabled_toolsets: List[str] = None,
        save_trajectories: bool = False,
        verbose_logging: bool = False,
        quiet_mode: bool = False,
        ephemeral_system_prompt: str = None,
        log_prefix_chars: int = 100,
        log_prefix: str = "",
        providers_allowed: List[str] = None,
        providers_ignored: List[str] = None,
        providers_order: List[str] = None,
        provider_sort: str = None,
        provider_require_parameters: bool = False,
        provider_data_collection: str = None,
        session_id: str = None,
        tool_progress_callback: callable = None,
        thinking_callback: callable = None,
        reasoning_callback: callable = None,
        clarify_callback: callable = None,
        step_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        prefill_messages: List[Dict[str, Any]] = None,
        platform: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
        session_db=None,
        honcho_session_key: str = None,
        iteration_budget: "IterationBudget" = None,
        fallback_model: Dict[str, Any] = None,
        checkpoints_enabled: bool = False,
        checkpoint_max_snapshots: int = 50,
    ):
        """
        Initialize the AI Agent.

        Args:
            base_url (str): Base URL for the model API (optional)
            api_key (str): API key for authentication (optional, uses env var if not provided)
            provider (str): Provider identifier (optional; used for telemetry/routing hints)
            api_mode (str): API mode override: "chat_completions" or "codex_responses"
            model (str): Model name to use (default: "anthropic/claude-opus-4.6")
            max_iterations (int): Maximum number of tool calling iterations (default: 90)
            tool_delay (float): Delay between tool calls in seconds (default: 1.0)
            enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
            disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
            save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
            verbose_logging (bool): Enable verbose logging for debugging (default: False)
            quiet_mode (bool): Suppress progress output for clean CLI experience (default: False)
            ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
            log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 100)
            log_prefix (str): Prefix to add to all log messages for identification in parallel processing (default: "")
            providers_allowed (List[str]): OpenRouter providers to allow (optional)
            providers_ignored (List[str]): OpenRouter providers to ignore (optional)
            providers_order (List[str]): OpenRouter providers to try in order (optional)
            provider_sort (str): Sort providers by price/throughput/latency (optional)
            session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
            tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
            clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
                Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
            reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
                If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
            prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
                Useful for injecting a few-shot example or priming the model's response style.
                Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
            platform (str): The interface platform the user is on (e.g. "cli", "telegram", "discord", "whatsapp").
                Used to inject platform-specific formatting hints into the system prompt.
            skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
                into the system prompt. Use this for batch processing and data generation to avoid
                polluting trajectories with user-specific persona or project instructions.
            honcho_session_key (str): Session key for Honcho integration (e.g., "telegram:123456" or CLI session_id).
                When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
        """
        self.model = model
        self.max_iterations = max_iterations
        # Shared iteration budget — parent creates, children inherit.
        # Consumed by every LLM turn across parent + all subagents.
        self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
        self.tool_delay = tool_delay
        self.save_trajectories = save_trajectories
        self.verbose_logging = verbose_logging
        self.quiet_mode = quiet_mode
        self.ephemeral_system_prompt = ephemeral_system_prompt
        self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
        self.skip_context_files = skip_context_files
        self.log_prefix_chars = log_prefix_chars
        self.log_prefix = f"{log_prefix} " if log_prefix else ""
        # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
        # When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
        self.base_url = base_url or OPENROUTER_BASE_URL
        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
        self.provider = provider_name or "openrouter"
        if api_mode in {"chat_completions", "codex_responses"}:
            self.api_mode = api_mode
        elif self.provider == "openai-codex":
            self.api_mode = "codex_responses"
        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
            self.api_mode = "codex_responses"
            self.provider = "openai-codex"
        else:
            self.api_mode = "chat_completions"

        self.tool_progress_callback = tool_progress_callback
        self.thinking_callback = thinking_callback
        self.reasoning_callback = reasoning_callback
        self.clarify_callback = clarify_callback
        self.step_callback = step_callback
        self._last_reported_tool = None  # Track for "new tool" mode
        
        # Interrupt mechanism for breaking out of tool loops
        self._interrupt_requested = False
        self._interrupt_message = None  # Optional message that triggered interrupt
        
        # Subagent delegation state
        self._delegate_depth = 0        # 0 = top-level agent, incremented for children
        self._active_children = []      # Running child AIAgents (for interrupt propagation)
        
        # Store OpenRouter provider preferences
        self.providers_allowed = providers_allowed
        self.providers_ignored = providers_ignored
        self.providers_order = providers_order
        self.provider_sort = provider_sort
        self.provider_require_parameters = provider_require_parameters
        self.provider_data_collection = provider_data_collection

        # Store toolset filtering options
        self.enabled_toolsets = enabled_toolsets
        self.disabled_toolsets = disabled_toolsets
        
        # Model response configuration
        self.max_tokens = max_tokens  # None = use model default
        self.reasoning_config = reasoning_config  # None = use default (medium for OpenRouter)
        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
        
        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
        # Reduces input costs by ~75% on multi-turn conversations by caching the
        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
        is_openrouter = "openrouter" in self.base_url.lower()
        is_claude = "claude" in self.model.lower()
        self._use_prompt_caching = is_openrouter and is_claude
        self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
        
        # Iteration budget pressure: warn the LLM as it approaches max_iterations.
        # Warnings are injected into the last tool result JSON (not as separate
        # messages) so they don't break message structure or invalidate caching.
        self._budget_caution_threshold = 0.7   # 70% — nudge to start wrapping up
        self._budget_warning_threshold = 0.9   # 90% — urgent, respond now
        self._budget_pressure_enabled = True

        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
        # so tool failures, API errors, etc. are inspectable after the fact.
        from agent.redact import RedactingFormatter
        _error_log_dir = Path.home() / ".hermes" / "logs"
        _error_log_dir.mkdir(parents=True, exist_ok=True)
        _error_log_path = _error_log_dir / "errors.log"
        from logging.handlers import RotatingFileHandler
        _error_file_handler = RotatingFileHandler(
            _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
        )
        _error_file_handler.setLevel(logging.WARNING)
        _error_file_handler.setFormatter(RedactingFormatter(
            '%(asctime)s %(levelname)s %(name)s: %(message)s',
        ))
        logging.getLogger().addHandler(_error_file_handler)

        if self.verbose_logging:
            logging.basicConfig(
                level=logging.DEBUG,
                format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                datefmt='%H:%M:%S'
            )
            for handler in logging.getLogger().handlers:
                handler.setFormatter(RedactingFormatter(
                    '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    datefmt='%H:%M:%S',
                ))
            # Keep third-party libraries at WARNING level to reduce noise
            # We have our own retry and error logging that's more informative
            logging.getLogger('openai').setLevel(logging.WARNING)
            logging.getLogger('openai._base_client').setLevel(logging.WARNING)
            logging.getLogger('httpx').setLevel(logging.WARNING)
            logging.getLogger('httpcore').setLevel(logging.WARNING)
            logging.getLogger('asyncio').setLevel(logging.WARNING)
            # Suppress Modal/gRPC related debug spam
            logging.getLogger('hpack').setLevel(logging.WARNING)
            logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
            logging.getLogger('grpc').setLevel(logging.WARNING)
            logging.getLogger('modal').setLevel(logging.WARNING)
            logging.getLogger('rex-deploy').setLevel(logging.INFO)  # Keep INFO for sandbox status
            logger.info("Verbose logging enabled (third-party library logs suppressed)")
        else:
            # Set logging to INFO level for important messages only
            logging.basicConfig(
                level=logging.INFO,
                format='%(asctime)s - %(levelname)s - %(message)s',
                datefmt='%H:%M:%S'
            )
            # Suppress noisy library logging
            logging.getLogger('openai').setLevel(logging.ERROR)
            logging.getLogger('openai._base_client').setLevel(logging.ERROR)
            logging.getLogger('httpx').setLevel(logging.ERROR)
            logging.getLogger('httpcore').setLevel(logging.ERROR)
            if self.quiet_mode:
                # In quiet mode (CLI default), suppress all tool/infra log
                # noise. The TUI has its own rich display for status; logger
                # INFO/WARNING messages just clutter it.
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
                    'minisweagent',         # mini-swe-agent execution backend
                    'run_agent',            # agent runner internals
                    'trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
                    'hermes_cli',           # CLI helpers
                ]:
                    logging.getLogger(quiet_logger).setLevel(logging.ERROR)
        
        # Initialize OpenAI client - defaults to OpenRouter
        client_kwargs = {}
        
        # Default to OpenRouter if no base_url provided
        if base_url:
            client_kwargs["base_url"] = base_url
        else:
            client_kwargs["base_url"] = OPENROUTER_BASE_URL
        
        # Handle API key - OpenRouter is the primary provider
        if api_key:
            client_kwargs["api_key"] = api_key
        else:
            # Primary: OPENROUTER_API_KEY, fallback to direct provider keys
            client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
        
        # OpenRouter app attribution — shows hermes-agent in rankings/analytics
        effective_base = client_kwargs.get("base_url", "")
        if "openrouter" in effective_base.lower():
            client_kwargs["default_headers"] = {
                "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                "X-OpenRouter-Title": "Hermes Agent",
                "X-OpenRouter-Categories": "productivity,cli-agent",
            }
        elif "api.kimi.com" in effective_base.lower():
            # Kimi Code API requires a recognized coding-agent User-Agent
            # (see https://github.com/MoonshotAI/kimi-cli)
            client_kwargs["default_headers"] = {
                "User-Agent": "KimiCLI/1.0",
            }
        
        self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
        try:
            self.client = OpenAI(**client_kwargs)
            if not self.quiet_mode:
                print(f"🤖 AI Agent initialized with model: {self.model}")
                if base_url:
                    print(f"🔗 Using custom base URL: {base_url}")
                # Always show API key info (masked) for debugging auth issues
                key_used = client_kwargs.get("api_key", "none")
                if key_used and key_used != "dummy-key" and len(key_used) > 12:
                    print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
                else:
                    print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
        except Exception as e:
            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
        
        # Provider fallback — a single backup model/provider tried when the
        # primary is exhausted (rate-limit, overload, connection failure).
        # Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
        self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
        self._fallback_activated = False
        if self._fallback_model:
            fb_p = self._fallback_model.get("provider", "")
            fb_m = self._fallback_model.get("model", "")
            if fb_p and fb_m and not self.quiet_mode:
                print(f"🔄 Fallback model: {fb_m} ({fb_p})")

        # Get available tools with filtering
        self.tools = get_tool_definitions(
            enabled_toolsets=enabled_toolsets,
            disabled_toolsets=disabled_toolsets,
            quiet_mode=self.quiet_mode,
        )
        
        # Show tool configuration and store valid tool names for validation
        self.valid_tool_names = set()
        if self.tools:
            self.valid_tool_names = {tool["function"]["name"] for tool in self.tools}
            tool_names = sorted(self.valid_tool_names)
            if not self.quiet_mode:
                print(f"🛠️  Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
                
                # Show filtering info if applied
                if enabled_toolsets:
                    print(f"   ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
                if disabled_toolsets:
                    print(f"   ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
        elif not self.quiet_mode:
            print("🛠️  No tools loaded (all tools filtered out or unavailable)")
        
        # Check tool requirements
        if self.tools and not self.quiet_mode:
            requirements = check_toolset_requirements()
            missing_reqs = [name for name, available in requirements.items() if not available]
            if missing_reqs:
                print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
        
        # Show trajectory saving status
        if self.save_trajectories and not self.quiet_mode:
            print("📝 Trajectory saving enabled")
        
        # Show ephemeral system prompt status
        if self.ephemeral_system_prompt and not self.quiet_mode:
            prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
            print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
        
        # Show prompt caching status
        if self._use_prompt_caching and not self.quiet_mode:
            print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
        
        # Session logging setup - auto-save conversation trajectories for debugging
        self.session_start = datetime.now()
        if session_id:
            # Use provided session ID (e.g., from CLI)
            self.session_id = session_id
        else:
            # Generate a new session ID
            timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
            short_uuid = uuid.uuid4().hex[:6]
            self.session_id = f"{timestamp_str}_{short_uuid}"
        
        # Session logs go into ~/.hermes/sessions/ alongside gateway sessions
        hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
        self.logs_dir = hermes_home / "sessions"
        self.logs_dir.mkdir(parents=True, exist_ok=True)
        self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
        
        # Track conversation messages for session logging
        self._session_messages: List[Dict[str, Any]] = []
        
        # Cached system prompt -- built once per session, only rebuilt on compression
        self._cached_system_prompt: Optional[str] = None
        
        # Filesystem checkpoint manager (transparent — not a tool)
        from tools.checkpoint_manager import CheckpointManager
        self._checkpoint_mgr = CheckpointManager(
            enabled=checkpoints_enabled,
            max_snapshots=checkpoint_max_snapshots,
        )
        
        # SQLite session store (optional -- provided by CLI or gateway)
        self._session_db = session_db
        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
        if self._session_db:
            try:
                self._session_db.create_session(
                    session_id=self.session_id,
                    source=self.platform or "cli",
                    model=self.model,
                    model_config={
                        "max_iterations": self.max_iterations,
                        "reasoning_config": reasoning_config,
                        "max_tokens": max_tokens,
                    },
                    user_id=None,
                )
            except Exception as e:
                logger.debug("Session DB create_session failed: %s", e)
        
        # In-memory todo list for task planning (one per agent/session)
        from tools.todo_tool import TodoStore
        self._todo_store = TodoStore()
        
        # Persistent memory (MEMORY.md + USER.md) -- loaded from disk
        self._memory_store = None
        self._memory_enabled = False
        self._user_profile_enabled = False
        self._memory_nudge_interval = 10
        self._memory_flush_min_turns = 6
        if not skip_memory:
            try:
                from hermes_cli.config import load_config as _load_mem_config
                mem_config = _load_mem_config().get("memory", {})
                self._memory_enabled = mem_config.get("memory_enabled", False)
                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
                self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
                if self._memory_enabled or self._user_profile_enabled:
                    from tools.memory_tool import MemoryStore
                    self._memory_store = MemoryStore(
                        memory_char_limit=mem_config.get("memory_char_limit", 2200),
                        user_char_limit=mem_config.get("user_char_limit", 1375),
                    )
                    self._memory_store.load_from_disk()
            except Exception:
                pass  # Memory is optional -- don't break agent init
        
        # Honcho AI-native memory (cross-session user modeling)
        # Reads ~/.honcho/config.json as the single source of truth.
        self._honcho = None  # HonchoSessionManager | None
        self._honcho_session_key = honcho_session_key
        if not skip_memory:
            try:
                from honcho_integration.client import HonchoClientConfig, get_honcho_client
                hcfg = HonchoClientConfig.from_global_config()
                if hcfg.enabled and hcfg.api_key:
                    from honcho_integration.session import HonchoSessionManager
                    client = get_honcho_client(hcfg)
                    self._honcho = HonchoSessionManager(
                        honcho=client,
                        config=hcfg,
                        context_tokens=hcfg.context_tokens,
                    )
                    # Resolve session key: explicit arg > global sessions map > fallback
                    if not self._honcho_session_key:
                        self._honcho_session_key = (
                            hcfg.resolve_session_name()
                            or "hermes-default"
                        )
                    # Ensure session exists in Honcho
                    self._honcho.get_or_create(self._honcho_session_key)
                    # Inject session context into the honcho tool module
                    from tools.honcho_tools import set_session_context
                    set_session_context(self._honcho, self._honcho_session_key)
                    logger.info(
                        "Honcho active (session: %s, user: %s, workspace: %s)",
                        self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
                    )
                else:
                    if not hcfg.enabled:
                        logger.debug("Honcho disabled in global config")
                    elif not hcfg.api_key:
                        logger.debug("Honcho enabled but no API key configured")
            except Exception as e:
                logger.debug("Honcho init failed (non-fatal): %s", e)
                self._honcho = None

        # Skills config: nudge interval for skill creation reminders
        self._skill_nudge_interval = 15
        try:
            from hermes_cli.config import load_config as _load_skills_config
            skills_config = _load_skills_config().get("skills", {})
            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
        except Exception:
            pass
        
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section) or environment variables
        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
        compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
        
        self.context_compressor = ContextCompressor(
            model=self.model,
            threshold_percent=compression_threshold,
            protect_first_n=3,
            protect_last_n=4,
            summary_target_tokens=500,
            summary_model_override=compression_summary_model,
            quiet_mode=self.quiet_mode,
            base_url=self.base_url,
        )
        self.compression_enabled = compression_enabled
        self._user_turn_count = 0

        # Cumulative token usage for the session
        self.session_prompt_tokens = 0
        self.session_completion_tokens = 0
        self.session_total_tokens = 0
        self.session_api_calls = 0
        
        if not self.quiet_mode:
            if compression_enabled:
                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
            else:
                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
    
    def _max_tokens_param(self, value: int) -> dict:
        """Return the correct max tokens kwarg for the current provider.
        
        OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
        'max_completion_tokens'. OpenRouter, local models, and older
        OpenAI models use 'max_tokens'.
        """
        _is_direct_openai = (
            "api.openai.com" in self.base_url.lower()
            and "openrouter" not in self.base_url.lower()
        )
        if _is_direct_openai:
            return {"max_completion_tokens": value}
        return {"max_tokens": value}

    def _has_content_after_think_block(self, content: str) -> bool:
        """
        Check if content has actual text after any <think></think> blocks.
        
        This detects cases where the model only outputs reasoning but no actual
        response, which indicates an incomplete generation that should be retried.
        
        Args:
            content: The assistant message content to check
            
        Returns:
            True if there's meaningful content after think blocks, False otherwise
        """
        if not content:
            return False
        
        # Remove all <think>...</think> blocks (including nested ones, non-greedy)
        cleaned = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
        
        # Check if there's any non-whitespace content remaining
        return bool(cleaned.strip())
    
    def _strip_think_blocks(self, content: str) -> str:
        """Remove <think>...</think> blocks from content, returning only visible text."""
        if not content:
            return ""
        return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)

    def _looks_like_codex_intermediate_ack(
        self,
        user_message: str,
        assistant_content: str,
        messages: List[Dict[str, Any]],
    ) -> bool:
        """Detect a planning/ack message that should continue instead of ending the turn."""
        if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
            return False

        assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
        if not assistant_text:
            return False
        if len(assistant_text) > 1200:
            return False

        has_future_ack = bool(
            re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
        )
        if not has_future_ack:
            return False

        action_markers = (
            "look into",
            "look at",
            "inspect",
            "scan",
            "check",
            "analyz",
            "review",
            "explore",
            "read",
            "open",
            "run",
            "test",
            "fix",
            "debug",
            "search",
            "find",
            "walkthrough",
            "report back",
            "summarize",
        )
        workspace_markers = (
            "directory",
            "current directory",
            "current dir",
            "cwd",
            "repo",
            "repository",
            "codebase",
            "project",
            "folder",
            "filesystem",
            "file tree",
            "files",
            "path",
        )

        user_text = (user_message or "").strip().lower()
        user_targets_workspace = (
            any(marker in user_text for marker in workspace_markers)
            or "~/" in user_text
            or "/" in user_text
        )
        assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
        assistant_targets_workspace = any(
            marker in assistant_text for marker in workspace_markers
        )
        return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
    
    
    def _extract_reasoning(self, assistant_message) -> Optional[str]:
        """
        Extract reasoning/thinking content from an assistant message.
        
        OpenRouter and various providers can return reasoning in multiple formats:
        1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
        2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
        3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
        
        Args:
            assistant_message: The assistant message object from the API response
            
        Returns:
            Combined reasoning text, or None if no reasoning found
        """
        reasoning_parts = []
        
        # Check direct reasoning field
        if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
            reasoning_parts.append(assistant_message.reasoning)
        
        # Check reasoning_content field (alternative name used by some providers)
        if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
            # Don't duplicate if same as reasoning
            if assistant_message.reasoning_content not in reasoning_parts:
                reasoning_parts.append(assistant_message.reasoning_content)
        
        # Check reasoning_details array (OpenRouter unified format)
        # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            for detail in assistant_message.reasoning_details:
                if isinstance(detail, dict):
                    # Extract summary from reasoning detail object
                    summary = detail.get('summary') or detail.get('content') or detail.get('text')
                    if summary and summary not in reasoning_parts:
                        reasoning_parts.append(summary)
        
        # Combine all reasoning parts
        if reasoning_parts:
            return "\n\n".join(reasoning_parts)
        
        return None
    
    def _cleanup_task_resources(self, task_id: str) -> None:
        """Clean up VM and browser resources for a given task."""
        try:
            cleanup_vm(task_id)
        except Exception as e:
            if self.verbose_logging:
                logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
        try:
            cleanup_browser(task_id)
        except Exception as e:
            if self.verbose_logging:
                logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")

    def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None):
        """Save session state to both JSON log and SQLite on any exit path.

        Ensures conversations are never lost, even on errors or early returns.
        """
        self._session_messages = messages
        self._save_session_log(messages)
        self._flush_messages_to_session_db(messages, conversation_history)

    def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
        """Persist any un-flushed messages to the SQLite session store.

        Uses _last_flushed_db_idx to track which messages have already been
        written, so repeated calls (from multiple exit paths) only write
        truly new messages — preventing the duplicate-write bug (#860).
        """
        if not self._session_db:
            return
        try:
            start_idx = len(conversation_history) if conversation_history else 0
            flush_from = max(start_idx, self._last_flushed_db_idx)
            for msg in messages[flush_from:]:
                role = msg.get("role", "unknown")
                content = msg.get("content")
                tool_calls_data = None
                if hasattr(msg, "tool_calls") and msg.tool_calls:
                    tool_calls_data = [
                        {"name": tc.function.name, "arguments": tc.function.arguments}
                        for tc in msg.tool_calls
                    ]
                elif isinstance(msg.get("tool_calls"), list):
                    tool_calls_data = msg["tool_calls"]
                self._session_db.append_message(
                    session_id=self.session_id,
                    role=role,
                    content=content,
                    tool_name=msg.get("tool_name"),
                    tool_calls=tool_calls_data,
                    tool_call_id=msg.get("tool_call_id"),
                    finish_reason=msg.get("finish_reason"),
                )
            self._last_flushed_db_idx = len(messages)
        except Exception as e:
            logger.debug("Session DB append_message failed: %s", e)

    def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
        """
        Get messages up to (but not including) the last assistant turn.
        
        This is used when we need to "roll back" to the last successful point
        in the conversation, typically when the final assistant message is
        incomplete or malformed.
        
        Args:
            messages: Full message list
            
        Returns:
            Messages up to the last complete assistant turn (ending with user/tool message)
        """
        if not messages:
            return []
        
        # Find the index of the last assistant message
        last_assistant_idx = None
        for i in range(len(messages) - 1, -1, -1):
            if messages[i].get("role") == "assistant":
                last_assistant_idx = i
                break
        
        if last_assistant_idx is None:
            # No assistant message found, return all messages
            return messages.copy()
        
        # Return everything up to (not including) the last assistant message
        return messages[:last_assistant_idx]
    
    def _format_tools_for_system_message(self) -> str:
        """
        Format tool definitions for the system message in the trajectory format.
        
        Returns:
            str: JSON string representation of tool definitions
        """
        if not self.tools:
            return "[]"
        
        # Convert tool definitions to the format expected in trajectories
        formatted_tools = []
        for tool in self.tools:
            func = tool["function"]
            formatted_tool = {
                "name": func["name"],
                "description": func.get("description", ""),
                "parameters": func.get("parameters", {}),
                "required": None  # Match the format in the example
            }
            formatted_tools.append(formatted_tool)
        
        return json.dumps(formatted_tools, ensure_ascii=False)
    
    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
        """
        Convert internal message format to trajectory format for saving.
        
        Args:
            messages (List[Dict]): Internal message history
            user_query (str): Original user query
            completed (bool): Whether the conversation completed successfully
            
        Returns:
            List[Dict]: Messages in trajectory format
        """
        trajectory = []
        
        # Add system message with tool definitions
        system_msg = (
            "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
            "You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
            "with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
            "into functions. After calling & executing the functions, you will be provided with function results within "
            "<tool_response> </tool_response> XML tags. Here are the available tools:\n"
            f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
            "For each function call return a JSON object, with the following pydantic model json schema for each:\n"
            "{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
            "'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
            "Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
            "Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
        )
        
        trajectory.append({
            "from": "system",
            "value": system_msg
        })
        
        # Add the actual user prompt (from the dataset) as the first human message
        trajectory.append({
            "from": "human",
            "value": user_query
        })
        
        # Skip the first message (the user query) since we already added it above.
        # Prefill messages are injected at API-call time only (not in the messages
        # list), so no offset adjustment is needed here.
        i = 1
        
        while i < len(messages):
            msg = messages[i]
            
            if msg["role"] == "assistant":
                # Check if this message has tool calls
                if "tool_calls" in msg and msg["tool_calls"]:
                    # Format assistant message with tool calls
                    # Add <think> tags around reasoning for trajectory storage
                    content = ""
                    
                    # Prepend reasoning in <think> tags if available (native thinking tokens)
                    if msg.get("reasoning") and msg["reasoning"].strip():
                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
                    
                    if msg.get("content") and msg["content"].strip():
                        # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
                        # (used when native thinking is disabled and model reasons via XML)
                        content += convert_scratchpad_to_think(msg["content"]) + "\n"
                    
                    # Add tool calls wrapped in XML tags
                    for tool_call in msg["tool_calls"]:
                        # Parse arguments - should always succeed since we validate during conversation
                        # but keep try-except as safety net
                        try:
                            arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
                        except json.JSONDecodeError:
                            # This shouldn't happen since we validate and retry during conversation,
                            # but if it does, log warning and use empty dict
                            logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
                            arguments = {}
                        
                        tool_call_json = {
                            "name": tool_call["function"]["name"],
                            "arguments": arguments
                        }
                        content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
                    
                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
                    # so the format is consistent for training data
                    if "<think>" not in content:
                        content = "<think>\n</think>\n" + content
                    
                    trajectory.append({
                        "from": "gpt",
                        "value": content.rstrip()
                    })
                    
                    # Collect all subsequent tool responses
                    tool_responses = []
                    j = i + 1
                    while j < len(messages) and messages[j]["role"] == "tool":
                        tool_msg = messages[j]
                        # Format tool response with XML tags
                        tool_response = f"<tool_response>\n"
                        
                        # Try to parse tool content as JSON if it looks like JSON
                        tool_content = tool_msg["content"]
                        try:
                            if tool_content.strip().startswith(("{", "[")):
                                tool_content = json.loads(tool_content)
                        except (json.JSONDecodeError, AttributeError):
                            pass  # Keep as string if not valid JSON
                        
                        tool_response += json.dumps({
                            "tool_call_id": tool_msg.get("tool_call_id", ""),
                            "name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
                            "content": tool_content
                        }, ensure_ascii=False)
                        tool_response += "\n</tool_response>"
                        tool_responses.append(tool_response)
                        j += 1
                    
                    # Add all tool responses as a single message
                    if tool_responses:
                        trajectory.append({
                            "from": "tool",
                            "value": "\n".join(tool_responses)
                        })
                        i = j - 1  # Skip the tool messages we just processed
                
                else:
                    # Regular assistant message without tool calls
                    # Add <think> tags around reasoning for trajectory storage
                    content = ""
                    
                    # Prepend reasoning in <think> tags if available (native thinking tokens)
                    if msg.get("reasoning") and msg["reasoning"].strip():
                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
                    
                    # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
                    # (used when native thinking is disabled and model reasons via XML)
                    raw_content = msg["content"] or ""
                    content += convert_scratchpad_to_think(raw_content)
                    
                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
                    if "<think>" not in content:
                        content = "<think>\n</think>\n" + content
                    
                    trajectory.append({
                        "from": "gpt",
                        "value": content.strip()
                    })
            
            elif msg["role"] == "user":
                trajectory.append({
                    "from": "human",
                    "value": msg["content"]
                })
            
            i += 1
        
        return trajectory
    
    def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
        """
        Save conversation trajectory to JSONL file.
        
        Args:
            messages (List[Dict]): Complete message history
            user_query (str): Original user query
            completed (bool): Whether the conversation completed successfully
        """
        if not self.save_trajectories:
            return
        
        trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
        _save_trajectory_to_file(trajectory, self.model, completed)
    
    def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
        if not key:
            return None
        if len(key) <= 12:
            return "***"
        return f"{key[:8]}...{key[-4:]}"

    def _dump_api_request_debug(
        self,
        api_kwargs: Dict[str, Any],
        *,
        reason: str,
        error: Optional[Exception] = None,
    ) -> Optional[Path]:
        """
        Dump a debug-friendly HTTP request record for chat.completions.create().

        Captures the request body from api_kwargs (excluding transport-only keys
        like timeout). Intended for debugging provider-side 4xx failures where
        retries are not useful.
        """
        try:
            body = copy.deepcopy(api_kwargs)
            body.pop("timeout", None)
            body = {k: v for k, v in body.items() if v is not None}

            api_key = None
            try:
                api_key = getattr(self.client, "api_key", None)
            except Exception as e:
                logger.debug("Could not extract API key for debug dump: %s", e)

            dump_payload: Dict[str, Any] = {
                "timestamp": datetime.now().isoformat(),
                "session_id": self.session_id,
                "reason": reason,
                "request": {
                    "method": "POST",
                    "url": f"{self.base_url.rstrip('/')}/chat/completions",
                    "headers": {
                        "Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}",
                        "Content-Type": "application/json",
                    },
                    "body": body,
                },
            }

            if error is not None:
                error_info: Dict[str, Any] = {
                    "type": type(error).__name__,
                    "message": str(error),
                }
                for attr_name in ("status_code", "request_id", "code", "param", "type"):
                    attr_value = getattr(error, attr_name, None)
                    if attr_value is not None:
                        error_info[attr_name] = attr_value

                body_attr = getattr(error, "body", None)
                if body_attr is not None:
                    error_info["body"] = body_attr

                response_obj = getattr(error, "response", None)
                if response_obj is not None:
                    try:
                        error_info["response_status"] = getattr(response_obj, "status_code", None)
                        error_info["response_text"] = response_obj.text
                    except Exception as e:
                        logger.debug("Could not extract error response details: %s", e)

                dump_payload["error"] = error_info

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
            dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
            dump_file.write_text(
                json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
                encoding="utf-8",
            )

            print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")

            if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
                print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))

            return dump_file
        except Exception as dump_error:
            if self.verbose_logging:
                logging.warning(f"Failed to dump API request debug payload: {dump_error}")
            return None

    @staticmethod
    def _clean_session_content(content: str) -> str:
        """Convert REASONING_SCRATCHPAD to think tags and clean up whitespace."""
        if not content:
            return content
        content = convert_scratchpad_to_think(content)
        content = re.sub(r'\n+(<think>)', r'\n\1', content)
        content = re.sub(r'(</think>)\n+', r'\1\n', content)
        return content.strip()

    def _save_session_log(self, messages: List[Dict[str, Any]] = None):
        """
        Save the full raw session to a JSON file.

        Stores every message exactly as the agent sees it: user messages,
        assistant messages (with reasoning, finish_reason, tool_calls),
        tool responses (with tool_call_id, tool_name), and injected system
        messages (compression summaries, todo snapshots, etc.).

        REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
        Overwritten after each turn so it always reflects the latest state.
        """
        messages = messages or self._session_messages
        if not messages:
            return

        try:
            # Clean assistant content for session logs
            cleaned = []
            for msg in messages:
                if msg.get("role") == "assistant" and msg.get("content"):
                    msg = dict(msg)
                    msg["content"] = self._clean_session_content(msg["content"])
                cleaned.append(msg)

            entry = {
                "session_id": self.session_id,
                "model": self.model,
                "base_url": self.base_url,
                "platform": self.platform,
                "session_start": self.session_start.isoformat(),
                "last_updated": datetime.now().isoformat(),
                "system_prompt": self._cached_system_prompt or "",
                "tools": self.tools or [],
                "message_count": len(cleaned),
                "messages": cleaned,
            }

            with open(self.session_log_file, "w", encoding="utf-8") as f:
                json.dump(entry, f, indent=2, ensure_ascii=False, default=str)

        except Exception as e:
            if self.verbose_logging:
                logging.warning(f"Failed to save session log: {e}")
    
    def interrupt(self, message: str = None) -> None:
        """
        Request the agent to interrupt its current tool-calling loop.
        
        Call this from another thread (e.g., input handler, message receiver)
        to gracefully stop the agent and process a new message.
        
        Also signals long-running tool executions (e.g. terminal commands)
        to terminate early, so the agent can respond immediately.
        
        Args:
            message: Optional new message that triggered the interrupt.
                     If provided, the agent will include this in its response context.
        
        Example (CLI):
            # In a separate input thread:
            if user_typed_something:
                agent.interrupt(user_input)
        
        Example (Messaging):
            # When new message arrives for active session:
            if session_has_running_agent:
                running_agent.interrupt(new_message.text)
        """
        self._interrupt_requested = True
        self._interrupt_message = message
        # Signal all tools to abort any in-flight operations immediately
        _set_interrupt(True)
        # Propagate interrupt to any running child agents (subagent delegation)
        for child in self._active_children:
            try:
                child.interrupt(message)
            except Exception as e:
                logger.debug("Failed to propagate interrupt to child agent: %s", e)
        if not self.quiet_mode:
            print(f"\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
    
    def clear_interrupt(self) -> None:
        """Clear any pending interrupt request and the global tool interrupt signal."""
        self._interrupt_requested = False
        self._interrupt_message = None
        _set_interrupt(False)
    
    def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
        """
        Recover todo state from conversation history.
        
        The gateway creates a fresh AIAgent per message, so the in-memory
        TodoStore is empty. We scan the history for the most recent todo
        tool response and replay it to reconstruct the state.
        """
        # Walk history backwards to find the most recent todo tool response
        last_todo_response = None
        for msg in reversed(history):
            if msg.get("role") != "tool":
                continue
            content = msg.get("content", "")
            # Quick check: todo responses contain "todos" key
            if '"todos"' not in content:
                continue
            try:
                data = json.loads(content)
                if "todos" in data and isinstance(data["todos"], list):
                    last_todo_response = data["todos"]
                    break
            except (json.JSONDecodeError, TypeError):
                continue
        
        if last_todo_response:
            # Replay the items into the store (replace mode)
            self._todo_store.write(last_todo_response, merge=False)
            if not self.quiet_mode:
                print(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
        _set_interrupt(False)
    
    @property
    def is_interrupted(self) -> bool:
        """Check if an interrupt has been requested."""
        return self._interrupt_requested

    # ── Honcho integration helpers ──

    def _honcho_prefetch(self, user_message: str) -> str:
        """Fetch user context from Honcho for system prompt injection.

        Returns a formatted context block, or empty string if unavailable.
        """
        if not self._honcho or not self._honcho_session_key:
            return ""
        try:
            ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
            if not ctx:
                return ""
            parts = []
            rep = ctx.get("representation", "")
            card = ctx.get("card", "")
            if rep:
                parts.append(rep)
            if card:
                parts.append(card)
            if not parts:
                return ""
            return "# Honcho User Context\n" + "\n\n".join(parts)
        except Exception as e:
            logger.debug("Honcho prefetch failed (non-fatal): %s", e)
            return ""

    def _honcho_save_user_observation(self, content: str) -> str:
        """Route a memory tool target=user add to Honcho.

        Sends the content as a user peer message so Honcho's reasoning
        model can incorporate it into the user representation.
        """
        if not content or not content.strip():
            return json.dumps({"success": False, "error": "Content cannot be empty."})
        try:
            session = self._honcho.get_or_create(self._honcho_session_key)
            session.add_message("user", f"[observation] {content.strip()}")
            self._honcho.save(session)
            return json.dumps({
                "success": True,
                "target": "user",
                "message": "Saved to Honcho user model.",
            })
        except Exception as e:
            logger.debug("Honcho user observation failed: %s", e)
            return json.dumps({"success": False, "error": f"Honcho save failed: {e}"})

    def _honcho_sync(self, user_content: str, assistant_content: str) -> None:
        """Sync the user/assistant message pair to Honcho."""
        if not self._honcho or not self._honcho_session_key:
            return
        try:
            session = self._honcho.get_or_create(self._honcho_session_key)
            session.add_message("user", user_content)
            session.add_message("assistant", assistant_content)
            self._honcho.save(session)
        except Exception as e:
            logger.debug("Honcho sync failed (non-fatal): %s", e)

    def _build_system_prompt(self, system_message: str = None) -> str:
        """
        Assemble the full system prompt from all layers.
        
        Called once per session (cached on self._cached_system_prompt) and only
        rebuilt after context compression events. This ensures the system prompt
        is stable across all turns in a session, maximizing prefix cache hits.
        """
        # Layers (in order):
        #   1. Default agent identity (always present)
        #   2. User / gateway system prompt (if provided)
        #   3. Persistent memory (frozen snapshot)
        #   4. Skills guidance (if skills tools are loaded)
        #   5. Context files (SOUL.md, AGENTS.md, .cursorrules)
        #   6. Current date & time (frozen at build time)
        #   7. Platform-specific formatting hint
        prompt_parts = [DEFAULT_AGENT_IDENTITY]

        # Tool-aware behavioral guidance: only inject when the tools are loaded
        tool_guidance = []
        if "memory" in self.valid_tool_names:
            tool_guidance.append(MEMORY_GUIDANCE)
        if "session_search" in self.valid_tool_names:
            tool_guidance.append(SESSION_SEARCH_GUIDANCE)
        if "skill_manage" in self.valid_tool_names:
            tool_guidance.append(SKILLS_GUIDANCE)
        if tool_guidance:
            prompt_parts.append(" ".join(tool_guidance))

        # Note: ephemeral_system_prompt is NOT included here. It's injected at
        # API-call time only so it stays out of the cached/stored system prompt.
        if system_message is not None:
            prompt_parts.append(system_message)

        if self._memory_store:
            if self._memory_enabled:
                mem_block = self._memory_store.format_for_system_prompt("memory")
                if mem_block:
                    prompt_parts.append(mem_block)
            # USER.md is always included when enabled -- Honcho prefetch is additive.
            if self._user_profile_enabled:
                user_block = self._memory_store.format_for_system_prompt("user")
                if user_block:
                    prompt_parts.append(user_block)

        has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
        skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
        if skills_prompt:
            prompt_parts.append(skills_prompt)

        if not self.skip_context_files:
            context_files_prompt = build_context_files_prompt()
            if context_files_prompt:
                prompt_parts.append(context_files_prompt)

        from hermes_time import now as _hermes_now
        now = _hermes_now()
        prompt_parts.append(
            f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
        )

        platform_key = (self.platform or "").lower().strip()
        if platform_key in PLATFORM_HINTS:
            prompt_parts.append(PLATFORM_HINTS[platform_key])

        return "\n\n".join(prompt_parts)
    
    def _repair_tool_call(self, tool_name: str) -> str | None:
        """Attempt to repair a mismatched tool name before aborting.

        1. Try lowercase
        2. Try normalized (lowercase + hyphens/spaces -> underscores)
        3. Try fuzzy match (difflib, cutoff=0.7)

        Returns the repaired name if found in valid_tool_names, else None.
        """
        from difflib import get_close_matches

        # 1. Lowercase
        lowered = tool_name.lower()
        if lowered in self.valid_tool_names:
            return lowered

        # 2. Normalize
        normalized = lowered.replace("-", "_").replace(" ", "_")
        if normalized in self.valid_tool_names:
            return normalized

        # 3. Fuzzy match
        matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
        if matches:
            return matches[0]

        return None

    def _invalidate_system_prompt(self):
        """
        Invalidate the cached system prompt, forcing a rebuild on the next turn.
        
        Called after context compression events. Also reloads memory from disk
        so the rebuilt prompt captures any writes from this session.
        """
        self._cached_system_prompt = None
        if self._memory_store:
            self._memory_store.load_from_disk()

    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
        """Convert chat-completions tool schemas to Responses function-tool schemas."""
        source_tools = tools if tools is not None else self.tools
        if not source_tools:
            return None

        converted: List[Dict[str, Any]] = []
        for item in source_tools:
            fn = item.get("function", {}) if isinstance(item, dict) else {}
            name = fn.get("name")
            if not isinstance(name, str) or not name.strip():
                continue
            converted.append({
                "type": "function",
                "name": name,
                "description": fn.get("description", ""),
                "strict": False,
                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
            })
        return converted or None

    @staticmethod
    def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
        """Split a stored tool id into (call_id, response_item_id)."""
        if not isinstance(raw_id, str):
            return None, None
        value = raw_id.strip()
        if not value:
            return None, None
        if "|" in value:
            call_id, response_item_id = value.split("|", 1)
            call_id = call_id.strip() or None
            response_item_id = response_item_id.strip() or None
            return call_id, response_item_id
        if value.startswith("fc_"):
            return None, value
        return value, None

    def _derive_responses_function_call_id(
        self,
        call_id: str,
        response_item_id: Optional[str] = None,
    ) -> str:
        """Build a valid Responses `function_call.id` (must start with `fc_`)."""
        if isinstance(response_item_id, str):
            candidate = response_item_id.strip()
            if candidate.startswith("fc_"):
                return candidate

        source = (call_id or "").strip()
        if source.startswith("fc_"):
            return source
        if source.startswith("call_") and len(source) > len("call_"):
            return f"fc_{source[len('call_'):]}"

        sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
        if sanitized.startswith("fc_"):
            return sanitized
        if sanitized.startswith("call_") and len(sanitized) > len("call_"):
            return f"fc_{sanitized[len('call_'):]}"
        if sanitized:
            return f"fc_{sanitized[:48]}"

        seed = source or str(response_item_id or "") or uuid.uuid4().hex
        digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
        return f"fc_{digest}"

    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Convert internal chat-style messages to Responses input items."""
        items: List[Dict[str, Any]] = []

        for msg in messages:
            if not isinstance(msg, dict):
                continue
            role = msg.get("role")
            if role == "system":
                continue

            if role in {"user", "assistant"}:
                content = msg.get("content", "")
                content_text = str(content) if content is not None else ""

                if role == "assistant":
                    # Replay encrypted reasoning items from previous turns
                    # so the API can maintain coherent reasoning chains.
                    codex_reasoning = msg.get("codex_reasoning_items")
                    if isinstance(codex_reasoning, list):
                        for ri in codex_reasoning:
                            if isinstance(ri, dict) and ri.get("encrypted_content"):
                                items.append(ri)

                    if content_text.strip():
                        items.append({"role": "assistant", "content": content_text})

                    tool_calls = msg.get("tool_calls")
                    if isinstance(tool_calls, list):
                        for tc in tool_calls:
                            if not isinstance(tc, dict):
                                continue
                            fn = tc.get("function", {})
                            fn_name = fn.get("name")
                            if not isinstance(fn_name, str) or not fn_name.strip():
                                continue

                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
                                tc.get("id")
                            )
                            call_id = tc.get("call_id")
                            if not isinstance(call_id, str) or not call_id.strip():
                                call_id = embedded_call_id
                            if not isinstance(call_id, str) or not call_id.strip():
                                if (
                                    isinstance(embedded_response_item_id, str)
                                    and embedded_response_item_id.startswith("fc_")
                                    and len(embedded_response_item_id) > len("fc_")
                                ):
                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
                                else:
                                    call_id = f"call_{uuid.uuid4().hex[:12]}"
                            call_id = call_id.strip()

                            arguments = fn.get("arguments", "{}")
                            if isinstance(arguments, dict):
                                arguments = json.dumps(arguments, ensure_ascii=False)
                            elif not isinstance(arguments, str):
                                arguments = str(arguments)
                            arguments = arguments.strip() or "{}"

                            items.append({
                                "type": "function_call",
                                "call_id": call_id,
                                "name": fn_name,
                                "arguments": arguments,
                            })
                    continue

                items.append({"role": role, "content": content_text})
                continue

            if role == "tool":
                raw_tool_call_id = msg.get("tool_call_id")
                call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
                if not isinstance(call_id, str) or not call_id.strip():
                    if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
                        call_id = raw_tool_call_id.strip()
                if not isinstance(call_id, str) or not call_id.strip():
                    continue
                items.append({
                    "type": "function_call_output",
                    "call_id": call_id,
                    "output": str(msg.get("content", "") or ""),
                })

        return items

    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
        if not isinstance(raw_items, list):
            raise ValueError("Codex Responses input must be a list of input items.")

        normalized: List[Dict[str, Any]] = []
        for idx, item in enumerate(raw_items):
            if not isinstance(item, dict):
                raise ValueError(f"Codex Responses input[{idx}] must be an object.")

            item_type = item.get("type")
            if item_type == "function_call":
                call_id = item.get("call_id")
                name = item.get("name")
                if not isinstance(call_id, str) or not call_id.strip():
                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
                if not isinstance(name, str) or not name.strip():
                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")

                arguments = item.get("arguments", "{}")
                if isinstance(arguments, dict):
                    arguments = json.dumps(arguments, ensure_ascii=False)
                elif not isinstance(arguments, str):
                    arguments = str(arguments)
                arguments = arguments.strip() or "{}"

                normalized.append(
                    {
                        "type": "function_call",
                        "call_id": call_id.strip(),
                        "name": name.strip(),
                        "arguments": arguments,
                    }
                )
                continue

            if item_type == "function_call_output":
                call_id = item.get("call_id")
                if not isinstance(call_id, str) or not call_id.strip():
                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
                output = item.get("output", "")
                if output is None:
                    output = ""
                if not isinstance(output, str):
                    output = str(output)

                normalized.append(
                    {
                        "type": "function_call_output",
                        "call_id": call_id.strip(),
                        "output": output,
                    }
                )
                continue

            if item_type == "reasoning":
                encrypted = item.get("encrypted_content")
                if isinstance(encrypted, str) and encrypted:
                    reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
                    item_id = item.get("id")
                    if isinstance(item_id, str) and item_id:
                        reasoning_item["id"] = item_id
                    summary = item.get("summary")
                    if isinstance(summary, list):
                        reasoning_item["summary"] = summary
                    else:
                        reasoning_item["summary"] = []
                    normalized.append(reasoning_item)
                continue

            role = item.get("role")
            if role in {"user", "assistant"}:
                content = item.get("content", "")
                if content is None:
                    content = ""
                if not isinstance(content, str):
                    content = str(content)

                normalized.append({"role": role, "content": content})
                continue

            raise ValueError(
                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
            )

        return normalized

    def _preflight_codex_api_kwargs(
        self,
        api_kwargs: Any,
        *,
        allow_stream: bool = False,
    ) -> Dict[str, Any]:
        if not isinstance(api_kwargs, dict):
            raise ValueError("Codex Responses request must be a dict.")

        required = {"model", "instructions", "input"}
        missing = [key for key in required if key not in api_kwargs]
        if missing:
            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")

        model = api_kwargs.get("model")
        if not isinstance(model, str) or not model.strip():
            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
        model = model.strip()

        instructions = api_kwargs.get("instructions")
        if instructions is None:
            instructions = ""
        if not isinstance(instructions, str):
            instructions = str(instructions)
        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY

        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))

        tools = api_kwargs.get("tools")
        normalized_tools = None
        if tools is not None:
            if not isinstance(tools, list):
                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
            normalized_tools = []
            for idx, tool in enumerate(tools):
                if not isinstance(tool, dict):
                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
                if tool.get("type") != "function":
                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")

                name = tool.get("name")
                parameters = tool.get("parameters")
                if not isinstance(name, str) or not name.strip():
                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
                if not isinstance(parameters, dict):
                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")

                description = tool.get("description", "")
                if description is None:
                    description = ""
                if not isinstance(description, str):
                    description = str(description)

                strict = tool.get("strict", False)
                if not isinstance(strict, bool):
                    strict = bool(strict)

                normalized_tools.append(
                    {
                        "type": "function",
                        "name": name.strip(),
                        "description": description,
                        "strict": strict,
                        "parameters": parameters,
                    }
                )

        store = api_kwargs.get("store", False)
        if store is not False:
            raise ValueError("Codex Responses contract requires 'store' to be false.")

        allowed_keys = {
            "model", "instructions", "input", "tools", "store",
            "reasoning", "include", "max_output_tokens", "temperature",
            "tool_choice", "parallel_tool_calls", "prompt_cache_key",
        }
        normalized: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
            "input": normalized_input,
            "tools": normalized_tools,
            "store": False,
        }

        # Pass through reasoning config
        reasoning = api_kwargs.get("reasoning")
        if isinstance(reasoning, dict):
            normalized["reasoning"] = reasoning
        include = api_kwargs.get("include")
        if isinstance(include, list):
            normalized["include"] = include

        # Pass through max_output_tokens and temperature
        max_output_tokens = api_kwargs.get("max_output_tokens")
        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
            normalized["max_output_tokens"] = int(max_output_tokens)
        temperature = api_kwargs.get("temperature")
        if isinstance(temperature, (int, float)):
            normalized["temperature"] = float(temperature)

        # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
        for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
            val = api_kwargs.get(passthrough_key)
            if val is not None:
                normalized[passthrough_key] = val

        if allow_stream:
            stream = api_kwargs.get("stream")
            if stream is not None and stream is not True:
                raise ValueError("Codex Responses 'stream' must be true when set.")
            if stream is True:
                normalized["stream"] = True
            allowed_keys.add("stream")
        elif "stream" in api_kwargs:
            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")

        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
        if unexpected:
            raise ValueError(
                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
            )

        return normalized

    def _extract_responses_message_text(self, item: Any) -> str:
        """Extract assistant text from a Responses message output item."""
        content = getattr(item, "content", None)
        if not isinstance(content, list):
            return ""

        chunks: List[str] = []
        for part in content:
            ptype = getattr(part, "type", None)
            if ptype not in {"output_text", "text"}:
                continue
            text = getattr(part, "text", None)
            if isinstance(text, str) and text:
                chunks.append(text)
        return "".join(chunks).strip()

    def _extract_responses_reasoning_text(self, item: Any) -> str:
        """Extract a compact reasoning text from a Responses reasoning item."""
        summary = getattr(item, "summary", None)
        if isinstance(summary, list):
            chunks: List[str] = []
            for part in summary:
                text = getattr(part, "text", None)
                if isinstance(text, str) and text:
                    chunks.append(text)
            if chunks:
                return "\n".join(chunks).strip()
        text = getattr(item, "text", None)
        if isinstance(text, str) and text:
            return text.strip()
        return ""

    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
        """Normalize a Responses API object to an assistant_message-like object."""
        output = getattr(response, "output", None)
        if not isinstance(output, list) or not output:
            raise RuntimeError("Responses API returned no output items")

        response_status = getattr(response, "status", None)
        if isinstance(response_status, str):
            response_status = response_status.strip().lower()
        else:
            response_status = None

        if response_status in {"failed", "cancelled"}:
            error_obj = getattr(response, "error", None)
            if isinstance(error_obj, dict):
                error_msg = error_obj.get("message") or str(error_obj)
            else:
                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
            raise RuntimeError(error_msg)

        content_parts: List[str] = []
        reasoning_parts: List[str] = []
        reasoning_items_raw: List[Dict[str, Any]] = []
        tool_calls: List[Any] = []
        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
        saw_commentary_phase = False
        saw_final_answer_phase = False

        for item in output:
            item_type = getattr(item, "type", None)
            item_status = getattr(item, "status", None)
            if isinstance(item_status, str):
                item_status = item_status.strip().lower()
            else:
                item_status = None

            if item_status in {"queued", "in_progress", "incomplete"}:
                has_incomplete_items = True

            if item_type == "message":
                item_phase = getattr(item, "phase", None)
                if isinstance(item_phase, str):
                    normalized_phase = item_phase.strip().lower()
                    if normalized_phase in {"commentary", "analysis"}:
                        saw_commentary_phase = True
                    elif normalized_phase in {"final_answer", "final"}:
                        saw_final_answer_phase = True
                message_text = self._extract_responses_message_text(item)
                if message_text:
                    content_parts.append(message_text)
            elif item_type == "reasoning":
                reasoning_text = self._extract_responses_reasoning_text(item)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
                # Capture the full reasoning item for multi-turn continuity.
                # encrypted_content is an opaque blob the API needs back on
                # subsequent turns to maintain coherent reasoning chains.
                encrypted = getattr(item, "encrypted_content", None)
                if isinstance(encrypted, str) and encrypted:
                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
                    item_id = getattr(item, "id", None)
                    if isinstance(item_id, str) and item_id:
                        raw_item["id"] = item_id
                    # Capture summary — required by the API when replaying reasoning items
                    summary = getattr(item, "summary", None)
                    if isinstance(summary, list):
                        raw_summary = []
                        for part in summary:
                            text = getattr(part, "text", None)
                            if isinstance(text, str):
                                raw_summary.append({"type": "summary_text", "text": text})
                        raw_item["summary"] = raw_summary
                    reasoning_items_raw.append(raw_item)
            elif item_type == "function_call":
                if item_status in {"queued", "in_progress", "incomplete"}:
                    continue
                fn_name = getattr(item, "name", "") or ""
                arguments = getattr(item, "arguments", "{}")
                if not isinstance(arguments, str):
                    arguments = str(arguments)
                raw_call_id = getattr(item, "call_id", None)
                raw_item_id = getattr(item, "id", None)
                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
                if not isinstance(call_id, str) or not call_id.strip():
                    call_id = f"call_{uuid.uuid4().hex[:12]}"
                call_id = call_id.strip()
                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
                tool_calls.append(SimpleNamespace(
                    id=call_id,
                    call_id=call_id,
                    response_item_id=response_item_id,
                    type="function",
                    function=SimpleNamespace(name=fn_name, arguments=arguments),
                ))
            elif item_type == "custom_tool_call":
                fn_name = getattr(item, "name", "") or ""
                arguments = getattr(item, "input", "{}")
                if not isinstance(arguments, str):
                    arguments = str(arguments)
                raw_call_id = getattr(item, "call_id", None)
                raw_item_id = getattr(item, "id", None)
                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
                if not isinstance(call_id, str) or not call_id.strip():
                    call_id = f"call_{uuid.uuid4().hex[:12]}"
                call_id = call_id.strip()
                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
                tool_calls.append(SimpleNamespace(
                    id=call_id,
                    call_id=call_id,
                    response_item_id=response_item_id,
                    type="function",
                    function=SimpleNamespace(name=fn_name, arguments=arguments),
                ))

        final_text = "\n".join([p for p in content_parts if p]).strip()
        if not final_text and hasattr(response, "output_text"):
            out_text = getattr(response, "output_text", "")
            if isinstance(out_text, str):
                final_text = out_text.strip()

        assistant_message = SimpleNamespace(
            content=final_text,
            tool_calls=tool_calls,
            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
            reasoning_content=None,
            reasoning_details=None,
            codex_reasoning_items=reasoning_items_raw or None,
        )

        if tool_calls:
            finish_reason = "tool_calls"
        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
            finish_reason = "incomplete"
        else:
            finish_reason = "stop"
        return assistant_message, finish_reason

    def _run_codex_stream(self, api_kwargs: dict):
        """Execute one streaming Responses API request and return the final response."""
        max_stream_retries = 1
        for attempt in range(max_stream_retries + 1):
            try:
                with self.client.responses.stream(**api_kwargs) as stream:
                    for _ in stream:
                        pass
                    return stream.get_final_response()
            except RuntimeError as exc:
                err_text = str(exc)
                missing_completed = "response.completed" in err_text
                if missing_completed and attempt < max_stream_retries:
                    logger.debug(
                        "Responses stream closed before completion (attempt %s/%s); retrying.",
                        attempt + 1,
                        max_stream_retries + 1,
                    )
                    continue
                if missing_completed:
                    logger.debug(
                        "Responses stream did not emit response.completed; falling back to create(stream=True)."
                    )
                    return self._run_codex_create_stream_fallback(api_kwargs)
                raise

    def _run_codex_create_stream_fallback(self, api_kwargs: dict):
        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
        stream_or_response = self.client.responses.create(**fallback_kwargs)

        # Compatibility shim for mocks or providers that still return a concrete response.
        if hasattr(stream_or_response, "output"):
            return stream_or_response
        if not hasattr(stream_or_response, "__iter__"):
            return stream_or_response

        terminal_response = None
        try:
            for event in stream_or_response:
                event_type = getattr(event, "type", None)
                if not event_type and isinstance(event, dict):
                    event_type = event.get("type")
                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
                    continue

                terminal_response = getattr(event, "response", None)
                if terminal_response is None and isinstance(event, dict):
                    terminal_response = event.get("response")
                if terminal_response is not None:
                    return terminal_response
        finally:
            close_fn = getattr(stream_or_response, "close", None)
            if callable(close_fn):
                try:
                    close_fn()
                except Exception:
                    pass

        if terminal_response is not None:
            return terminal_response
        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")

    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
            return False

        try:
            from hermes_cli.auth import resolve_codex_runtime_credentials

            creds = resolve_codex_runtime_credentials(force_refresh=force)
        except Exception as exc:
            logger.debug("Codex credential refresh failed: %s", exc)
            return False

        api_key = creds.get("api_key")
        base_url = creds.get("base_url")
        if not isinstance(api_key, str) or not api_key.strip():
            return False
        if not isinstance(base_url, str) or not base_url.strip():
            return False

        self.api_key = api_key.strip()
        self.base_url = base_url.strip().rstrip("/")
        self._client_kwargs["api_key"] = self.api_key
        self._client_kwargs["base_url"] = self.base_url

        try:
            self.client.close()
        except Exception:
            pass

        try:
            self.client = OpenAI(**self._client_kwargs)
        except Exception as exc:
            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
            return False

        return True

    def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
        if self.api_mode != "chat_completions" or self.provider != "nous":
            return False

        try:
            from hermes_cli.auth import resolve_nous_runtime_credentials

            creds = resolve_nous_runtime_credentials(
                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
                force_mint=force,
            )
        except Exception as exc:
            logger.debug("Nous credential refresh failed: %s", exc)
            return False

        api_key = creds.get("api_key")
        base_url = creds.get("base_url")
        if not isinstance(api_key, str) or not api_key.strip():
            return False
        if not isinstance(base_url, str) or not base_url.strip():
            return False

        self.api_key = api_key.strip()
        self.base_url = base_url.strip().rstrip("/")
        self._client_kwargs["api_key"] = self.api_key
        self._client_kwargs["base_url"] = self.base_url
        # Nous requests should not inherit OpenRouter-only attribution headers.
        self._client_kwargs.pop("default_headers", None)

        try:
            self.client.close()
        except Exception:
            pass

        try:
            self.client = OpenAI(**self._client_kwargs)
        except Exception as exc:
            logger.warning("Failed to rebuild OpenAI client after Nous refresh: %s", exc)
            return False

        return True

    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
        can detect interrupts without waiting for the full HTTP round-trip.
        
        On interrupt, closes the HTTP client to cancel the in-flight request
        (stops token generation and avoids wasting money), then rebuilds the
        client for future calls.
        """
        result = {"response": None, "error": None}

        def _call():
            try:
                if self.api_mode == "codex_responses":
                    result["response"] = self._run_codex_stream(api_kwargs)
                else:
                    result["response"] = self.client.chat.completions.create(**api_kwargs)
            except Exception as e:
                result["error"] = e

        t = threading.Thread(target=_call, daemon=True)
        t.start()
        while t.is_alive():
            t.join(timeout=0.3)
            if self._interrupt_requested:
                # Force-close the HTTP connection to stop token generation
                try:
                    self.client.close()
                except Exception:
                    pass
                # Rebuild the client for future calls (cheap, no network)
                try:
                    self.client = OpenAI(**self._client_kwargs)
                except Exception:
                    pass
                raise InterruptedError("Agent interrupted during API call")
        if result["error"] is not None:
            raise result["error"]
        return result["response"]

    # ── Provider fallback ──────────────────────────────────────────────────

    # API-key providers: provider → (base_url, [env_var_names])
    _FALLBACK_API_KEY_PROVIDERS = {
        "openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
        "zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
        "kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
        "minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
        "minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
    }

    # OAuth providers: provider → (resolver_import_path, api_mode)
    # Each resolver returns {"api_key": ..., "base_url": ...}.
    _FALLBACK_OAUTH_PROVIDERS = {
        "openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
        "nous": ("resolve_nous_runtime_credentials", "chat_completions"),
    }

    def _resolve_fallback_credentials(
        self, fb_provider: str, fb_config: dict
    ) -> Optional[tuple]:
        """Resolve credentials for a fallback provider.

        Returns (api_key, base_url, api_mode) on success, or None on failure.
        Handles three cases:
          1. OAuth providers (openai-codex, nous) — call credential resolver
          2. API-key providers (openrouter, zai, etc.) — read env var
          3. Custom endpoints — use base_url + api_key_env from config
        """
        # ── 1. OAuth providers ────────────────────────────────────────
        if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
            resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
            try:
                import hermes_cli.auth as _auth
                resolver = getattr(_auth, resolver_name)
                creds = resolver()
                return creds["api_key"], creds["base_url"], api_mode
            except Exception as e:
                logging.warning(
                    "Fallback to %s failed (credential resolution): %s",
                    fb_provider, e,
                )
                return None

        # ── 2. API-key providers ──────────────────────────────────────
        fb_key = (fb_config.get("api_key") or "").strip()
        if not fb_key:
            key_env = (fb_config.get("api_key_env") or "").strip()
            if key_env:
                fb_key = os.getenv(key_env, "")
            elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
                for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
                    fb_key = os.getenv(env_var, "")
                    if fb_key:
                        break
        if not fb_key:
            logging.warning(
                "Fallback model configured but no API key found for provider '%s'",
                fb_provider,
            )
            return None

        # ── 3. Resolve base URL ───────────────────────────────────────
        fb_base_url = (fb_config.get("base_url") or "").strip()
        if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
            fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
        if not fb_base_url:
            fb_base_url = OPENROUTER_BASE_URL

        return fb_key, fb_base_url, "chat_completions"

    def _try_activate_fallback(self) -> bool:
        """Switch to the configured fallback model/provider.

        Called when the primary model is failing after retries.  Swaps the
        OpenAI client, model slug, and provider in-place so the retry loop
        can continue with the new backend.  One-shot: returns False if
        already activated or not configured.
        """
        if self._fallback_activated or not self._fallback_model:
            return False

        fb = self._fallback_model
        fb_provider = (fb.get("provider") or "").strip().lower()
        fb_model = (fb.get("model") or "").strip()
        if not fb_provider or not fb_model:
            return False

        resolved = self._resolve_fallback_credentials(fb_provider, fb)
        if resolved is None:
            return False
        fb_key, fb_base_url, fb_api_mode = resolved

        # Build new client
        try:
            client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
            if "openrouter" in fb_base_url.lower():
                client_kwargs["default_headers"] = {
                    "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
                    "X-OpenRouter-Title": "Hermes Agent",
                    "X-OpenRouter-Categories": "productivity,cli-agent",
                }
            elif "api.kimi.com" in fb_base_url.lower():
                client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}

            self.client = OpenAI(**client_kwargs)
            self._client_kwargs = client_kwargs
            old_model = self.model
            self.model = fb_model
            self.provider = fb_provider
            self.base_url = fb_base_url
            self.api_mode = fb_api_mode
            self._fallback_activated = True

            # Re-evaluate prompt caching for the new provider/model
            self._use_prompt_caching = (
                "openrouter" in fb_base_url.lower()
                and "claude" in fb_model.lower()
            )

            print(
                f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
                f"{fb_model} via {fb_provider}"
            )
            logging.info(
                "Fallback activated: %s → %s (%s)",
                old_model, fb_model, fb_provider,
            )
            return True
        except Exception as e:
            logging.error("Failed to activate fallback model: %s", e)
            return False

    # ── End provider fallback ──────────────────────────────────────────────

    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "codex_responses":
            instructions = ""
            payload_messages = api_messages
            if api_messages and api_messages[0].get("role") == "system":
                instructions = str(api_messages[0].get("content") or "").strip()
                payload_messages = api_messages[1:]
            if not instructions:
                instructions = DEFAULT_AGENT_IDENTITY

            # Resolve reasoning effort: config > default (medium)
            reasoning_effort = "medium"
            reasoning_enabled = True
            if self.reasoning_config and isinstance(self.reasoning_config, dict):
                if self.reasoning_config.get("enabled") is False:
                    reasoning_enabled = False
                elif self.reasoning_config.get("effort"):
                    reasoning_effort = self.reasoning_config["effort"]

            kwargs = {
                "model": self.model,
                "instructions": instructions,
                "input": self._chat_messages_to_responses_input(payload_messages),
                "tools": self._responses_tools(),
                "tool_choice": "auto",
                "parallel_tool_calls": True,
                "store": False,
                "prompt_cache_key": self.session_id,
            }

            if reasoning_enabled:
                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
                kwargs["include"] = ["reasoning.encrypted_content"]
            else:
                kwargs["include"] = []

            if self.max_tokens is not None:
                kwargs["max_output_tokens"] = self.max_tokens

            return kwargs

        provider_preferences = {}
        if self.providers_allowed:
            provider_preferences["only"] = self.providers_allowed
        if self.providers_ignored:
            provider_preferences["ignore"] = self.providers_ignored
        if self.providers_order:
            provider_preferences["order"] = self.providers_order
        if self.provider_sort:
            provider_preferences["sort"] = self.provider_sort
        if self.provider_require_parameters:
            provider_preferences["require_parameters"] = True
        if self.provider_data_collection:
            provider_preferences["data_collection"] = self.provider_data_collection

        api_kwargs = {
            "model": self.model,
            "messages": api_messages,
            "tools": self.tools if self.tools else None,
            "timeout": 900.0,
        }

        if self.max_tokens is not None:
            api_kwargs.update(self._max_tokens_param(self.max_tokens))

        extra_body = {}

        if provider_preferences:
            extra_body["provider"] = provider_preferences

        _is_openrouter = "openrouter" in self.base_url.lower()
        _is_nous = "nousresearch" in self.base_url.lower()

        _is_mistral = "api.mistral.ai" in self.base_url.lower()
        if (_is_openrouter or _is_nous) and not _is_mistral:
            if self.reasoning_config is not None:
                extra_body["reasoning"] = self.reasoning_config
            else:
                extra_body["reasoning"] = {
                    "enabled": True,
                    "effort": "medium"
                }

        # Nous Portal product attribution
        if _is_nous:
            extra_body["tags"] = ["product=hermes-agent"]

        if extra_body:
            api_kwargs["extra_body"] = extra_body

        return api_kwargs

    def _build_assistant_message(self, assistant_message, finish_reason: str) -> dict:
        """Build a normalized assistant message dict from an API response message.

        Handles reasoning extraction, reasoning_details, and optional tool_calls
        so both the tool-call path and the final-response path share one builder.
        """
        reasoning_text = self._extract_reasoning(assistant_message)

        if reasoning_text and self.verbose_logging:
            preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")

        if reasoning_text and self.reasoning_callback:
            try:
                self.reasoning_callback(reasoning_text)
            except Exception:
                pass

        msg = {
            "role": "assistant",
            "content": assistant_message.content or "",
            "reasoning": reasoning_text,
            "finish_reason": finish_reason,
        }

        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            # Pass reasoning_details back unmodified so providers (OpenRouter,
            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
            # Each provider may include opaque fields (signature, encrypted_content)
            # that must be preserved exactly.
            raw_details = assistant_message.reasoning_details
            preserved = []
            for d in raw_details:
                if isinstance(d, dict):
                    preserved.append(d)
                elif hasattr(d, "__dict__"):
                    preserved.append(d.__dict__)
                elif hasattr(d, "model_dump"):
                    preserved.append(d.model_dump())
            if preserved:
                msg["reasoning_details"] = preserved

        # Codex Responses API: preserve encrypted reasoning items for
        # multi-turn continuity. These get replayed as input on the next turn.
        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
        if codex_items:
            msg["codex_reasoning_items"] = codex_items

        if assistant_message.tool_calls:
            tool_calls = []
            for tool_call in assistant_message.tool_calls:
                raw_id = getattr(tool_call, "id", None)
                call_id = getattr(tool_call, "call_id", None)
                if not isinstance(call_id, str) or not call_id.strip():
                    embedded_call_id, _ = self._split_responses_tool_id(raw_id)
                    call_id = embedded_call_id
                if not isinstance(call_id, str) or not call_id.strip():
                    if isinstance(raw_id, str) and raw_id.strip():
                        call_id = raw_id.strip()
                    else:
                        call_id = f"call_{uuid.uuid4().hex[:12]}"
                call_id = call_id.strip()

                response_item_id = getattr(tool_call, "response_item_id", None)
                if not isinstance(response_item_id, str) or not response_item_id.strip():
                    _, embedded_response_item_id = self._split_responses_tool_id(raw_id)
                    response_item_id = embedded_response_item_id

                response_item_id = self._derive_responses_function_call_id(
                    call_id,
                    response_item_id if isinstance(response_item_id, str) else None,
                )

                tc_dict = {
                    "id": call_id,
                    "call_id": call_id,
                    "response_item_id": response_item_id,
                    "type": tool_call.type,
                    "function": {
                        "name": tool_call.function.name,
                        "arguments": tool_call.function.arguments
                    },
                }
                # Preserve extra_content (e.g. Gemini thought_signature) so it
                # is sent back on subsequent API calls.  Without this, Gemini 3
                # thinking models reject the request with a 400 error.
                extra = getattr(tool_call, "extra_content", None)
                if extra is not None:
                    if hasattr(extra, "model_dump"):
                        extra = extra.model_dump()
                    tc_dict["extra_content"] = extra
                tool_calls.append(tc_dict)
            msg["tool_calls"] = tool_calls

        return msg

    def flush_memories(self, messages: list = None, min_turns: int = None):
        """Give the model one turn to persist memories before context is lost.

        Called before compression, session reset, or CLI exit. Injects a flush
        message, makes one API call, executes any memory tool calls, then
        strips all flush artifacts from the message list.

        Args:
            messages: The current conversation messages. If None, uses
                      self._session_messages (last run_conversation state).
            min_turns: Minimum user turns required to trigger the flush.
                       None = use config value (flush_min_turns).
                       0 = always flush (used for compression).
        """
        if self._memory_flush_min_turns == 0 and min_turns is None:
            return
        if "memory" not in self.valid_tool_names or not self._memory_store:
            return
        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
        if self._user_turn_count < effective_min:
            return

        if messages is None:
            messages = getattr(self, '_session_messages', None)
        if not messages or len(messages) < 3:
            return

        flush_content = (
            "[System: The session is being compressed. "
            "Please save anything worth remembering to your memories.]"
        )
        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
        messages.append(flush_msg)

        try:
            # Build API messages for the flush call
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
                if msg.get("role") == "assistant":
                    reasoning = msg.get("reasoning")
                    if reasoning:
                        api_msg["reasoning_content"] = reasoning
                api_msg.pop("reasoning", None)
                api_msg.pop("finish_reason", None)
                api_msg.pop("_flush_sentinel", None)
                api_messages.append(api_msg)

            if self._cached_system_prompt:
                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages

            # Make one API call with only the memory tool available
            memory_tool_def = None
            for t in (self.tools or []):
                if t.get("function", {}).get("name") == "memory":
                    memory_tool_def = t
                    break

            if not memory_tool_def:
                messages.pop()  # remove flush msg
                return

            # Use auxiliary client for the flush call when available --
            # it's cheaper and avoids Codex Responses API incompatibility.
            from agent.auxiliary_client import get_text_auxiliary_client
            aux_client, aux_model = get_text_auxiliary_client()

            if aux_client:
                api_kwargs = {
                    "model": aux_model,
                    "messages": api_messages,
                    "tools": [memory_tool_def],
                    "temperature": 0.3,
                    "max_tokens": 5120,
                }
                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
            elif self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
                codex_kwargs["temperature"] = 0.3
                if "max_output_tokens" in codex_kwargs:
                    codex_kwargs["max_output_tokens"] = 5120
                response = self._run_codex_stream(codex_kwargs)
            else:
                api_kwargs = {
                    "model": self.model,
                    "messages": api_messages,
                    "tools": [memory_tool_def],
                    "temperature": 0.3,
                    **self._max_tokens_param(5120),
                }
                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)

            # Extract tool calls from the response, handling both API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not aux_client:
                assistant_msg, _ = self._normalize_codex_response(response)
                if assistant_msg and assistant_msg.tool_calls:
                    tool_calls = assistant_msg.tool_calls
            elif hasattr(response, "choices") and response.choices:
                assistant_message = response.choices[0].message
                if assistant_message.tool_calls:
                    tool_calls = assistant_message.tool_calls

            for tc in tool_calls:
                if tc.function.name == "memory":
                    try:
                        args = json.loads(tc.function.arguments)
                        flush_target = args.get("target", "memory")
                        from tools.memory_tool import memory_tool as _memory_tool
                        result = _memory_tool(
                            action=args.get("action"),
                            target=flush_target,
                            content=args.get("content"),
                            old_text=args.get("old_text"),
                            store=self._memory_store,
                        )
                        if self._honcho and flush_target == "user" and args.get("action") == "add":
                            self._honcho_save_user_observation(args.get("content", ""))
                        if not self.quiet_mode:
                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
                    except Exception as e:
                        logger.debug("Memory flush tool call failed: %s", e)
        except Exception as e:
            logger.debug("Memory flush API call failed: %s", e)
        finally:
            # Strip flush artifacts: remove everything from the flush message onward.
            # Use sentinel marker instead of identity check for robustness.
            while messages and messages[-1].get("_flush_sentinel") != _sentinel:
                messages.pop()
                if not messages:
                    break
            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
                messages.pop()

    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
        """Compress conversation context and split the session in SQLite.

        Returns:
            (compressed_messages, new_system_prompt) tuple
        """
        # Pre-compression memory flush: let the model save memories before they're lost
        self.flush_memories(messages, min_turns=0)

        compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)

        todo_snapshot = self._todo_store.format_for_injection()
        if todo_snapshot:
            compressed.append({"role": "user", "content": todo_snapshot})

        # Preserve file-read history so the model doesn't re-read files
        # it already examined before compression.
        try:
            from tools.file_tools import get_read_files_summary
            read_files = get_read_files_summary(task_id)
            if read_files:
                file_list = "\n".join(
                    f"  - {f['path']} ({', '.join(f['regions'])})"
                    for f in read_files
                )
                compressed.append({"role": "user", "content": (
                    "[Files already read in this session — do NOT re-read these]\n"
                    f"{file_list}\n"
                    "Use the information from the context summary above. "
                    "Proceed with writing, editing, or responding."
                )})
        except Exception:
            pass  # Don't break compression if file tracking fails

        self._invalidate_system_prompt()
        new_system_prompt = self._build_system_prompt(system_message)
        self._cached_system_prompt = new_system_prompt

        if self._session_db:
            try:
                # Propagate title to the new session with auto-numbering
                old_title = self._session_db.get_session_title(self.session_id)
                self._session_db.end_session(self.session_id, "compression")
                old_session_id = self.session_id
                self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
                self._session_db.create_session(
                    session_id=self.session_id,
                    source=self.platform or "cli",
                    model=self.model,
                    parent_session_id=old_session_id,
                )
                # Auto-number the title for the continuation session
                if old_title:
                    try:
                        new_title = self._session_db.get_next_title_in_lineage(old_title)
                        self._session_db.set_session_title(self.session_id, new_title)
                    except (ValueError, Exception) as e:
                        logger.debug("Could not propagate title on compression: %s", e)
                self._session_db.update_system_prompt(self.session_id, new_system_prompt)
                # Reset flush cursor — new session starts with no messages written
                self._last_flushed_db_idx = 0
            except Exception as e:
                logger.debug("Session DB compression split failed: %s", e)

        return compressed, new_system_prompt

    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
        """Execute tool calls from the assistant message and append results to messages."""
        for i, tool_call in enumerate(assistant_message.tool_calls, 1):
            # SAFETY: check interrupt BEFORE starting each tool.
            # If the user sent "stop" during a previous tool's execution,
            # do NOT start any more tools -- skip them all immediately.
            if self._interrupt_requested:
                remaining_calls = assistant_message.tool_calls[i-1:]
                if remaining_calls:
                    print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
                for skipped_tc in remaining_calls:
                    skipped_name = skipped_tc.function.name
                    skip_msg = {
                        "role": "tool",
                        "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
                        "tool_call_id": skipped_tc.id,
                    }
                    messages.append(skip_msg)
                break

            function_name = tool_call.function.name

            # Reset nudge counters when the relevant tool is actually used
            if function_name == "memory":
                self._turns_since_memory = 0
            elif function_name == "skill_manage":
                self._iters_since_skill = 0

            try:
                function_args = json.loads(tool_call.function.arguments)
            except json.JSONDecodeError as e:
                logging.warning(f"Unexpected JSON error after validation: {e}")
                function_args = {}
            if not isinstance(function_args, dict):
                function_args = {}

            if not self.quiet_mode:
                args_str = json.dumps(function_args, ensure_ascii=False)
                args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")

            if self.tool_progress_callback:
                try:
                    preview = _build_tool_preview(function_name, function_args)
                    self.tool_progress_callback(function_name, preview, function_args)
                except Exception as cb_err:
                    logging.debug(f"Tool progress callback error: {cb_err}")

            # Checkpoint: snapshot working dir before file-mutating tools
            if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
                try:
                    file_path = function_args.get("path", "")
                    if file_path:
                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
                        self._checkpoint_mgr.ensure_checkpoint(
                            work_dir, f"before {function_name}"
                        )
                except Exception:
                    pass  # never block tool execution

            tool_start_time = time.time()

            if function_name == "todo":
                from tools.todo_tool import todo_tool as _todo_tool
                function_result = _todo_tool(
                    todos=function_args.get("todos"),
                    merge=function_args.get("merge", False),
                    store=self._todo_store,
                )
                tool_duration = time.time() - tool_start_time
                if self.quiet_mode:
                    print(f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
            elif function_name == "session_search":
                if not self._session_db:
                    function_result = json.dumps({"success": False, "error": "Session database not available."})
                else:
                    from tools.session_search_tool import session_search as _session_search
                    function_result = _session_search(
                        query=function_args.get("query", ""),
                        role_filter=function_args.get("role_filter"),
                        limit=function_args.get("limit", 3),
                        db=self._session_db,
                        current_session_id=self.session_id,
                    )
                tool_duration = time.time() - tool_start_time
                if self.quiet_mode:
                    print(f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
            elif function_name == "memory":
                target = function_args.get("target", "memory")
                from tools.memory_tool import memory_tool as _memory_tool
                function_result = _memory_tool(
                    action=function_args.get("action"),
                    target=target,
                    content=function_args.get("content"),
                    old_text=function_args.get("old_text"),
                    store=self._memory_store,
                )
                # Also send user observations to Honcho when active
                if self._honcho and target == "user" and function_args.get("action") == "add":
                    self._honcho_save_user_observation(function_args.get("content", ""))
                tool_duration = time.time() - tool_start_time
                if self.quiet_mode:
                    print(f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
            elif function_name == "clarify":
                from tools.clarify_tool import clarify_tool as _clarify_tool
                function_result = _clarify_tool(
                    question=function_args.get("question", ""),
                    choices=function_args.get("choices"),
                    callback=self.clarify_callback,
                )
                tool_duration = time.time() - tool_start_time
                if self.quiet_mode:
                    print(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
            elif function_name == "delegate_task":
                from tools.delegate_tool import delegate_task as _delegate_task
                tasks_arg = function_args.get("tasks")
                if tasks_arg and isinstance(tasks_arg, list):
                    spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
                else:
                    goal_preview = (function_args.get("goal") or "")[:30]
                    spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
                spinner = None
                if self.quiet_mode:
                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
                    spinner.start()
                self._delegate_spinner = spinner
                _delegate_result = None
                try:
                    function_result = _delegate_task(
                        goal=function_args.get("goal"),
                        context=function_args.get("context"),
                        toolsets=function_args.get("toolsets"),
                        tasks=tasks_arg,
                        max_iterations=function_args.get("max_iterations"),
                        parent_agent=self,
                    )
                    _delegate_result = function_result
                finally:
                    self._delegate_spinner = None
                    tool_duration = time.time() - tool_start_time
                    cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
                    if spinner:
                        spinner.stop(cute_msg)
                    elif self.quiet_mode:
                        print(f"  {cute_msg}")
            elif self.quiet_mode:
                face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                tool_emoji_map = {
                    'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
                    'terminal': '💻', 'process': '⚙️',
                    'read_file': '📖', 'write_file': '✍️', 'patch': '🔧', 'search_files': '🔎',
                    'browser_navigate': '🌐', 'browser_snapshot': '📸',
                    'browser_click': '👆', 'browser_type': '⌨️',
                    'browser_scroll': '📜', 'browser_back': '◀️',
                    'browser_press': '⌨️', 'browser_close': '🚪',
                    'browser_get_images': '🖼️', 'browser_vision': '👁️',
                    'image_generate': '🎨', 'text_to_speech': '🔊',
                    'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
                    'skills_list': '📚', 'skill_view': '📚',
                    'schedule_cronjob': '⏰', 'list_cronjobs': '⏰', 'remove_cronjob': '⏰',
                    'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
                    'clarify': '❓', 'execute_code': '🐍', 'delegate_task': '🔀',
                }
                emoji = tool_emoji_map.get(function_name, '⚡')
                preview = _build_tool_preview(function_name, function_args) or function_name
                if len(preview) > 30:
                    preview = preview[:27] + "..."
                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
                spinner.start()
                _spinner_result = None
                try:
                    function_result = handle_function_call(
                        function_name, function_args, effective_task_id,
                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
                    )
                    _spinner_result = function_result
                except Exception as tool_error:
                    function_result = f"Error executing tool '{function_name}': {tool_error}"
                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                finally:
                    tool_duration = time.time() - tool_start_time
                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
                    spinner.stop(cute_msg)
            else:
                try:
                    function_result = handle_function_call(
                        function_name, function_args, effective_task_id,
                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
                    )
                except Exception as tool_error:
                    function_result = f"Error executing tool '{function_name}': {tool_error}"
                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                tool_duration = time.time() - tool_start_time

            result_preview = function_result[:200] if len(function_result) > 200 else function_result

            # Log tool errors to the persistent error log so [error] tags
            # in the UI always have a corresponding detailed entry on disk.
            _is_error_result, _ = _detect_tool_failure(function_name, function_result)
            if _is_error_result:
                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)

            if self.verbose_logging:
                logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
                logging.debug(f"Tool result preview: {result_preview}...")

            # Guard against tools returning absurdly large content that would
            # blow up the context window. 100K chars ≈ 25K tokens — generous
            # enough for any reasonable tool output but prevents catastrophic
            # context explosions (e.g. accidental base64 image dumps).
            MAX_TOOL_RESULT_CHARS = 100_000
            if len(function_result) > MAX_TOOL_RESULT_CHARS:
                original_len = len(function_result)
                function_result = (
                    function_result[:MAX_TOOL_RESULT_CHARS]
                    + f"\n\n[Truncated: tool response was {original_len:,} chars, "
                    f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
                )

            tool_msg = {
                "role": "tool",
                "content": function_result,
                "tool_call_id": tool_call.id
            }
            messages.append(tool_msg)

            if not self.quiet_mode:
                response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")

            if self._interrupt_requested and i < len(assistant_message.tool_calls):
                remaining = len(assistant_message.tool_calls) - i
                print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
                for skipped_tc in assistant_message.tool_calls[i:]:
                    skipped_name = skipped_tc.function.name
                    skip_msg = {
                        "role": "tool",
                        "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
                        "tool_call_id": skipped_tc.id
                    }
                    messages.append(skip_msg)
                break

            if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
                time.sleep(self.tool_delay)

        # ── Budget pressure injection ─────────────────────────────────
        # After all tool calls in this turn are processed, check if we're
        # approaching max_iterations. If so, inject a warning into the LAST
        # tool result's JSON so the LLM sees it naturally when reading results.
        budget_warning = self._get_budget_warning(api_call_count)
        if budget_warning and messages and messages[-1].get("role") == "tool":
            last_content = messages[-1]["content"]
            try:
                parsed = json.loads(last_content)
                if isinstance(parsed, dict):
                    parsed["_budget_warning"] = budget_warning
                    messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
                else:
                    messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
            except (json.JSONDecodeError, TypeError):
                messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
            if not self.quiet_mode:
                remaining = self.max_iterations - api_call_count
                tier = "⚠️  WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
                print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")

    def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
        """Return a budget pressure string, or None if not yet needed.

        Two-tier system:
          - Caution (70%): nudge to consolidate work
          - Warning (90%): urgent, must respond now
        """
        if not self._budget_pressure_enabled or self.max_iterations <= 0:
            return None
        progress = api_call_count / self.max_iterations
        remaining = self.max_iterations - api_call_count
        if progress >= self._budget_warning_threshold:
            return (
                f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
                f"Only {remaining} iteration(s) left. "
                "Provide your final response NOW. No more tool calls unless absolutely critical.]"
            )
        if progress >= self._budget_caution_threshold:
            return (
                f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
                f"{remaining} iterations left. Start consolidating your work.]"
            )
        return None

    def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
        """Request a summary when max iterations are reached. Returns the final response text."""
        print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary...")

        summary_request = (
            "You've reached the maximum number of tool-calling iterations allowed. "
            "Please provide a final response summarizing what you've found and accomplished so far, "
            "without calling any more tools."
        )
        messages.append({"role": "user", "content": summary_request})

        try:
            # Build API messages, stripping internal-only fields
            # (finish_reason, reasoning) that strict APIs like Mistral reject with 422
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
                for internal_field in ("reasoning", "finish_reason"):
                    api_msg.pop(internal_field, None)
                api_messages.append(api_msg)

            effective_system = self._cached_system_prompt or ""
            if self.ephemeral_system_prompt:
                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
            if effective_system:
                api_messages = [{"role": "system", "content": effective_system}] + api_messages
            if self.prefill_messages:
                sys_offset = 1 if effective_system else 0
                for idx, pfm in enumerate(self.prefill_messages):
                    api_messages.insert(sys_offset + idx, pfm.copy())

            summary_extra_body = {}
            _is_openrouter = "openrouter" in self.base_url.lower()
            _is_nous = "nousresearch" in self.base_url.lower()
            if _is_openrouter or _is_nous:
                if self.reasoning_config is not None:
                    summary_extra_body["reasoning"] = self.reasoning_config
                else:
                    summary_extra_body["reasoning"] = {
                        "enabled": True,
                        "effort": "medium"
                    }
            if _is_nous:
                summary_extra_body["tags"] = ["product=hermes-agent"]

            if self.api_mode == "codex_responses":
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs.pop("tools", None)
                summary_response = self._run_codex_stream(codex_kwargs)
                assistant_message, _ = self._normalize_codex_response(summary_response)
                final_response = (assistant_message.content or "").strip() if assistant_message else ""
            else:
                summary_kwargs = {
                    "model": self.model,
                    "messages": api_messages,
                }
                if self.max_tokens is not None:
                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))

                # Include provider routing preferences
                provider_preferences = {}
                if self.providers_allowed:
                    provider_preferences["only"] = self.providers_allowed
                if self.providers_ignored:
                    provider_preferences["ignore"] = self.providers_ignored
                if self.providers_order:
                    provider_preferences["order"] = self.providers_order
                if self.provider_sort:
                    provider_preferences["sort"] = self.provider_sort
                if provider_preferences:
                    summary_extra_body["provider"] = provider_preferences

                if summary_extra_body:
                    summary_kwargs["extra_body"] = summary_extra_body

                summary_response = self.client.chat.completions.create(**summary_kwargs)

                if summary_response.choices and summary_response.choices[0].message.content:
                    final_response = summary_response.choices[0].message.content
                else:
                    final_response = ""

            if final_response:
                if "<think>" in final_response:
                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                if final_response:
                    messages.append({"role": "assistant", "content": final_response})
                else:
                    final_response = "I reached the iteration limit and couldn't generate a summary."
            else:
                # Retry summary generation
                if self.api_mode == "codex_responses":
                    codex_kwargs = self._build_api_kwargs(api_messages)
                    codex_kwargs.pop("tools", None)
                    retry_response = self._run_codex_stream(codex_kwargs)
                    retry_msg, _ = self._normalize_codex_response(retry_response)
                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
                else:
                    summary_kwargs = {
                        "model": self.model,
                        "messages": api_messages,
                    }
                    if self.max_tokens is not None:
                        summary_kwargs.update(self._max_tokens_param(self.max_tokens))
                    if summary_extra_body:
                        summary_kwargs["extra_body"] = summary_extra_body

                    summary_response = self.client.chat.completions.create(**summary_kwargs)

                    if summary_response.choices and summary_response.choices[0].message.content:
                        final_response = summary_response.choices[0].message.content
                    else:
                        final_response = ""

                if final_response:
                    if "<think>" in final_response:
                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                    if final_response:
                        messages.append({"role": "assistant", "content": final_response})
                    else:
                        final_response = "I reached the iteration limit and couldn't generate a summary."
                else:
                    final_response = "I reached the iteration limit and couldn't generate a summary."

        except Exception as e:
            logging.warning(f"Failed to get summary response: {e}")
            final_response = f"I reached the maximum iterations ({self.max_iterations}) but couldn't summarize. Error: {str(e)}"

        return final_response

    def run_conversation(
        self,
        user_message: str,
        system_message: str = None,
        conversation_history: List[Dict[str, Any]] = None,
        task_id: str = None
    ) -> Dict[str, Any]:
        """
        Run a complete conversation with tool calling until completion.

        Args:
            user_message (str): The user's message/question
            system_message (str): Custom system message (optional, overrides ephemeral_system_prompt if provided)
            conversation_history (List[Dict]): Previous conversation messages (optional)
            task_id (str): Unique identifier for this task to isolate VMs between concurrent tasks (optional, auto-generated if not provided)

        Returns:
            Dict: Complete conversation result with final response and message history
        """
        # Generate unique task_id if not provided to isolate VMs between concurrent tasks
        effective_task_id = task_id or str(uuid.uuid4())
        
        # Reset retry counters and iteration budget at the start of each turn
        # so subagent usage from a previous turn doesn't eat into the next one.
        self._invalid_tool_retries = 0
        self._invalid_json_retries = 0
        self._empty_content_retries = 0
        self._incomplete_scratchpad_retries = 0
        self._codex_incomplete_retries = 0
        self._last_content_with_tools = None
        self._turns_since_memory = 0
        self._iters_since_skill = 0
        self.iteration_budget = IterationBudget(self.max_iterations)
        
        # Initialize conversation (copy to avoid mutating the caller's list)
        messages = list(conversation_history) if conversation_history else []
        
        # Hydrate todo store from conversation history (gateway creates a fresh
        # AIAgent per message, so the in-memory store is empty -- we need to
        # recover the todo state from the most recent todo tool response in history)
        if conversation_history and not self._todo_store.has_items():
            self._hydrate_todo_store(conversation_history)
        
        # Prefill messages (few-shot priming) are injected at API-call time only,
        # never stored in the messages list. This keeps them ephemeral: they won't
        # be saved to session DB, session logs, or batch trajectories, but they're
        # automatically re-applied on every API call (including session continuations).
        
        # Track user turns for memory flush and periodic nudge logic
        self._user_turn_count += 1

        # Preserve the original user message before nudge injection.
        # Honcho should receive the actual user input, not system nudges.
        original_user_message = user_message

        # Periodic memory nudge: remind the model to consider saving memories.
        # Counter resets whenever the memory tool is actually used.
        if (self._memory_nudge_interval > 0
                and "memory" in self.valid_tool_names
                and self._memory_store):
            self._turns_since_memory += 1
            if self._turns_since_memory >= self._memory_nudge_interval:
                user_message += (
                    "\n\n[System: You've had several exchanges in this session. "
                    "Consider whether there's anything worth saving to your memories.]"
                )
                self._turns_since_memory = 0

        # Skill creation nudge: fires on the first user message after a long tool loop.
        # The counter increments per API iteration in the tool loop and is checked here.
        if (self._skill_nudge_interval > 0
                and self._iters_since_skill >= self._skill_nudge_interval
                and "skill_manage" in self.valid_tool_names):
            user_message += (
                "\n\n[System: The previous task involved many steps. "
                "If you discovered a reusable workflow, consider saving it as a skill.]"
            )
            self._iters_since_skill = 0

        # Honcho prefetch: retrieve user context for system prompt injection.
        # Only on the FIRST turn of a session (empty history).  On subsequent
        # turns the model already has all prior context in its conversation
        # history, and the Honcho context is baked into the stored system
        # prompt — re-fetching it would change the system message and break
        # Anthropic prompt caching.
        self._honcho_context = ""
        if self._honcho and self._honcho_session_key and not conversation_history:
            try:
                self._honcho_context = self._honcho_prefetch(user_message)
            except Exception as e:
                logger.debug("Honcho prefetch failed (non-fatal): %s", e)

        # Add user message
        user_msg = {"role": "user", "content": user_message}
        messages.append(user_msg)
        
        if not self.quiet_mode:
            print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
        
        # ── System prompt (cached per session for prefix caching) ──
        # Built once on first call, reused for all subsequent calls.
        # Only rebuilt after context compression events (which invalidate
        # the cache and reload memory from disk).
        #
        # For continuing sessions (gateway creates a fresh AIAgent per
        # message), we load the stored system prompt from the session DB
        # instead of rebuilding.  Rebuilding would pick up memory changes
        # from disk that the model already knows about (it wrote them!),
        # producing a different system prompt and breaking the Anthropic
        # prefix cache.
        if self._cached_system_prompt is None:
            stored_prompt = None
            if conversation_history and self._session_db:
                try:
                    session_row = self._session_db.get_session(self.session_id)
                    if session_row:
                        stored_prompt = session_row.get("system_prompt") or None
                except Exception:
                    pass  # Fall through to build fresh

            if stored_prompt:
                # Continuing session — reuse the exact system prompt from
                # the previous turn so the Anthropic cache prefix matches.
                self._cached_system_prompt = stored_prompt
            else:
                # First turn of a new session — build from scratch.
                self._cached_system_prompt = self._build_system_prompt(system_message)
                # Bake Honcho context into the prompt so it's stable for
                # the entire session (not re-fetched per turn).
                if self._honcho_context:
                    self._cached_system_prompt = (
                        self._cached_system_prompt + "\n\n" + self._honcho_context
                    ).strip()
                # Store the system prompt snapshot in SQLite
                if self._session_db:
                    try:
                        self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
                    except Exception as e:
                        logger.debug("Session DB update_system_prompt failed: %s", e)

        active_system_prompt = self._cached_system_prompt

        # ── Preflight context compression ──
        # Before entering the main loop, check if the loaded conversation
        # history already exceeds the model's context threshold.  This handles
        # cases where a user switches to a model with a smaller context window
        # while having a large existing session — compress proactively rather
        # than waiting for an API error (which might be caught as a non-retryable
        # 4xx and abort the request entirely).
        if (
            self.compression_enabled
            and len(messages) > self.context_compressor.protect_first_n
                                + self.context_compressor.protect_last_n + 1
        ):
            _sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
            _msg_tok_est = estimate_messages_tokens_rough(messages)
            _preflight_tokens = _sys_tok_est + _msg_tok_est

            if _preflight_tokens >= self.context_compressor.threshold_tokens:
                logger.info(
                    "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
                    f"{_preflight_tokens:,}",
                    f"{self.context_compressor.threshold_tokens:,}",
                    self.model,
                    f"{self.context_compressor.context_length:,}",
                )
                if not self.quiet_mode:
                    print(
                        f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
                        f">= {self.context_compressor.threshold_tokens:,} threshold"
                    )
                # May need multiple passes for very large sessions with small
                # context windows (each pass summarises the middle N turns).
                for _pass in range(3):
                    _orig_len = len(messages)
                    messages, active_system_prompt = self._compress_context(
                        messages, system_message, approx_tokens=_preflight_tokens,
                        task_id=effective_task_id,
                    )
                    if len(messages) >= _orig_len:
                        break  # Cannot compress further
                    # Re-estimate after compression
                    _sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
                    _msg_tok_est = estimate_messages_tokens_rough(messages)
                    _preflight_tokens = _sys_tok_est + _msg_tok_est
                    if _preflight_tokens < self.context_compressor.threshold_tokens:
                        break  # Under threshold

        # Main conversation loop
        api_call_count = 0
        final_response = None
        interrupted = False
        codex_ack_continuations = 0
        length_continue_retries = 0
        truncated_response_prefix = ""
        
        # Clear any stale interrupt state at start
        self.clear_interrupt()
        
        while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
            # Reset per-turn checkpoint dedup so each iteration can take one snapshot
            self._checkpoint_mgr.new_turn()

            # Check for interrupt request (e.g., user sent new message)
            if self._interrupt_requested:
                interrupted = True
                if not self.quiet_mode:
                    print(f"\n⚡ Breaking out of tool loop due to interrupt...")
                break
            
            api_call_count += 1
            if not self.iteration_budget.consume():
                if not self.quiet_mode:
                    print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
                break

            # Fire step_callback for gateway hooks (agent:step event)
            if self.step_callback is not None:
                try:
                    prev_tools = []
                    for _m in reversed(messages):
                        if _m.get("role") == "assistant" and _m.get("tool_calls"):
                            prev_tools = [
                                tc["function"]["name"]
                                for tc in _m["tool_calls"]
                                if isinstance(tc, dict)
                            ]
                            break
                    self.step_callback(api_call_count, prev_tools)
                except Exception as _step_err:
                    logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)

            # Track tool-calling iterations for skill nudge.
            # Counter resets whenever skill_manage is actually used.
            if (self._skill_nudge_interval > 0
                    and "skill_manage" in self.valid_tool_names):
                self._iters_since_skill += 1
            
            # Prepare messages for API call
            # If we have an ephemeral system prompt, prepend it to the messages
            # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
            # However, providers like Moonshot AI require a separate 'reasoning_content' field
            # on assistant messages with tool_calls. We handle both cases here.
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()

                # For ALL assistant messages, pass reasoning back to the API
                # This ensures multi-turn reasoning context is preserved
                if msg.get("role") == "assistant":
                    reasoning_text = msg.get("reasoning")
                    if reasoning_text:
                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
                        api_msg["reasoning_content"] = reasoning_text

                # Remove 'reasoning' field - it's for trajectory storage only
                # We've copied it to 'reasoning_content' for the API above
                if "reasoning" in api_msg:
                    api_msg.pop("reasoning")
                # Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
                if "finish_reason" in api_msg:
                    api_msg.pop("finish_reason")
                # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
                # The signature field helps maintain reasoning continuity
                api_messages.append(api_msg)

            # Build the final system message: cached prompt + ephemeral system prompt.
            # The ephemeral part is appended here (not baked into the cached prompt)
            # so it stays out of the session DB and logs.
            # Note: Honcho context is baked into _cached_system_prompt on the first
            # turn and stored in the session DB, so it does NOT need to be injected
            # here.  This keeps the system message identical across all turns in a
            # session, maximizing Anthropic prompt cache hits.
            effective_system = active_system_prompt or ""
            if self.ephemeral_system_prompt:
                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
            if effective_system:
                api_messages = [{"role": "system", "content": effective_system}] + api_messages

            # Inject ephemeral prefill messages right after the system prompt
            # but before conversation history. Same API-call-time-only pattern.
            if self.prefill_messages:
                sys_offset = 1 if effective_system else 0
                for idx, pfm in enumerate(self.prefill_messages):
                    api_messages.insert(sys_offset + idx, pfm.copy())

            # Apply Anthropic prompt caching for Claude models via OpenRouter.
            # Auto-detected: if model name contains "claude" and base_url is OpenRouter,
            # inject cache_control breakpoints (system + last 3 messages) to reduce
            # input token costs by ~75% on multi-turn conversations.
            if self._use_prompt_caching:
                api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl)

            # Safety net: strip orphaned tool results / add stubs for missing
            # results before sending to the API.  The compressor handles this
            # during compression, but orphans can also sneak in from session
            # loading or manual message manipulation.
            if hasattr(self, 'context_compressor') and self.context_compressor:
                api_messages = self.context_compressor._sanitize_tool_pairs(api_messages)

            # Calculate approximate request size for logging
            total_chars = sum(len(str(msg)) for msg in api_messages)
            approx_tokens = total_chars // 4  # Rough estimate: 4 chars per token
            
            # Thinking spinner for quiet mode (animated during API call)
            thinking_spinner = None
            
            if not self.quiet_mode:
                print(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
                print(f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
                print(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
            else:
                # Animated thinking spinner in quiet mode
                face = random.choice(KawaiiSpinner.KAWAII_THINKING)
                verb = random.choice(KawaiiSpinner.THINKING_VERBS)
                if self.thinking_callback:
                    # CLI TUI mode: use prompt_toolkit widget instead of raw spinner
                    self.thinking_callback(f"{face} {verb}...")
                else:
                    spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
                    thinking_spinner.start()
            
            # Log request details if verbose
            if self.verbose_logging:
                logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}")
                logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}")
                logging.debug(f"Total message size: ~{approx_tokens:,} tokens")
            
            api_start_time = time.time()
            retry_count = 0
            max_retries = 3
            compression_attempts = 0
            max_compression_attempts = 3
            codex_auth_retry_attempted = False
            nous_auth_retry_attempted = False
            restart_with_compressed_messages = False
            restart_with_length_continuation = False

            finish_reason = "stop"
            response = None  # Guard against UnboundLocalError if all retries fail

            while retry_count < max_retries:
                try:
                    api_kwargs = self._build_api_kwargs(api_messages)
                    if self.api_mode == "codex_responses":
                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)

                    if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                        self._dump_api_request_debug(api_kwargs, reason="preflight")

                    response = self._interruptible_api_call(api_kwargs)
                    
                    api_duration = time.time() - api_start_time
                    
                    # Stop thinking spinner silently -- the response box or tool
                    # execution messages that follow are more informative.
                    if thinking_spinner:
                        thinking_spinner.stop("")
                        thinking_spinner = None
                    if self.thinking_callback:
                        self.thinking_callback("")
                    
                    if not self.quiet_mode:
                        print(f"{self.log_prefix}⏱️  API call completed in {api_duration:.2f}s")
                    
                    if self.verbose_logging:
                        # Log response with provider info if available
                        resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
                        logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
                    
                    # Validate response shape before proceeding
                    response_invalid = False
                    error_details = []
                    if self.api_mode == "codex_responses":
                        output_items = getattr(response, "output", None) if response is not None else None
                        if response is None:
                            response_invalid = True
                            error_details.append("response is None")
                        elif not isinstance(output_items, list):
                            response_invalid = True
                            error_details.append("response.output is not a list")
                        elif len(output_items) == 0:
                            response_invalid = True
                            error_details.append("response.output is empty")
                    else:
                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
                            response_invalid = True
                            if response is None:
                                error_details.append("response is None")
                            elif not hasattr(response, 'choices'):
                                error_details.append("response has no 'choices' attribute")
                            elif response.choices is None:
                                error_details.append("response.choices is None")
                            else:
                                error_details.append("response.choices is empty")

                    if response_invalid:
                        # Stop spinner before printing error messages
                        if thinking_spinner:
                            thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
                            thinking_spinner = None
                        if self.thinking_callback:
                            self.thinking_callback("")
                        
                        # This is often rate limiting or provider returning malformed response
                        retry_count += 1
                        
                        # Check for error field in response (some providers include this)
                        error_msg = "Unknown"
                        provider_name = "Unknown"
                        if response and hasattr(response, 'error') and response.error:
                            error_msg = str(response.error)
                            # Try to extract provider from error metadata
                            if hasattr(response.error, 'metadata') and response.error.metadata:
                                provider_name = response.error.metadata.get('provider_name', 'Unknown')
                        elif response and hasattr(response, 'message') and response.message:
                            error_msg = str(response.message)
                        
                        # Try to get provider from model field (OpenRouter often returns actual model used)
                        if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
                            provider_name = f"model={response.model}"
                        
                        # Check for x-openrouter-provider or similar metadata
                        if provider_name == "Unknown" and response:
                            # Log all response attributes for debugging
                            resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
                            if self.verbose_logging:
                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
                        
                        print(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
                        print(f"{self.log_prefix}   🏢 Provider: {provider_name}")
                        print(f"{self.log_prefix}   📝 Provider message: {error_msg[:200]}")
                        print(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
                        
                        if retry_count >= max_retries:
                            # Try fallback before giving up
                            if self._try_activate_fallback():
                                retry_count = 0
                                continue
                            print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
                            logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
                            self._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
                                "failed": True  # Mark as failure for filtering
                            }
                        
                        # Longer backoff for rate limiting (likely cause of None choices)
                        wait_time = min(5 * (2 ** (retry_count - 1)), 120)  # 5s, 10s, 20s, 40s, 80s, 120s
                        print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                        
                        # Sleep in small increments to stay responsive to interrupts
                        sleep_end = time.time() + wait_time
                        while time.time() < sleep_end:
                            if self._interrupt_requested:
                                print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
                                self._persist_session(messages, conversation_history)
                                self.clear_interrupt()
                                return {
                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
                                    "interrupted": True,
                                }
                            time.sleep(0.2)
                        continue  # Retry the API call

                    # Check finish_reason before proceeding
                    if self.api_mode == "codex_responses":
                        status = getattr(response, "status", None)
                        incomplete_details = getattr(response, "incomplete_details", None)
                        incomplete_reason = None
                        if isinstance(incomplete_details, dict):
                            incomplete_reason = incomplete_details.get("reason")
                        else:
                            incomplete_reason = getattr(incomplete_details, "reason", None)
                        if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
                            finish_reason = "length"
                        else:
                            finish_reason = "stop"
                    else:
                        finish_reason = response.choices[0].finish_reason

                    if finish_reason == "length":
                        print(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens")

                        if self.api_mode == "chat_completions":
                            assistant_message = response.choices[0].message
                            if not assistant_message.tool_calls:
                                length_continue_retries += 1
                                interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                                messages.append(interim_msg)
                                if assistant_message.content:
                                    truncated_response_prefix += assistant_message.content

                                if length_continue_retries < 3:
                                    print(
                                        f"{self.log_prefix}↻ Requesting continuation "
                                        f"({length_continue_retries}/3)..."
                                    )
                                    continue_msg = {
                                        "role": "user",
                                        "content": (
                                            "[System: Your previous response was truncated by the output "
                                            "length limit. Continue exactly where you left off. Do not "
                                            "restart or repeat prior text. Finish the answer directly.]"
                                        ),
                                    }
                                    messages.append(continue_msg)
                                    self._session_messages = messages
                                    self._save_session_log(messages)
                                    restart_with_length_continuation = True
                                    break

                                partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
                                self._cleanup_task_resources(effective_task_id)
                                self._persist_session(messages, conversation_history)
                                return {
                                    "final_response": partial_response or None,
                                    "messages": messages,
                                    "api_calls": api_call_count,
                                    "completed": False,
                                    "partial": True,
                                    "error": "Response remained truncated after 3 continuation attempts",
                                }

                        # If we have prior messages, roll back to last complete state
                        if len(messages) > 1:
                            print(f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn")
                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)

                            self._cleanup_task_resources(effective_task_id)
                            self._persist_session(messages, conversation_history)

                            return {
                                "final_response": None,
                                "messages": rolled_back_messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "partial": True,
                                "error": "Response truncated due to output length limit"
                            }
                        else:
                            # First message was truncated - mark as failed
                            print(f"{self.log_prefix}❌ First response truncated - cannot recover")
                            self._persist_session(messages, conversation_history)
                            return {
                                "final_response": None,
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "failed": True,
                                "error": "First response truncated due to output length limit"
                            }
                    
                    # Track actual token usage from response for context management
                    if hasattr(response, 'usage') and response.usage:
                        if self.api_mode == "codex_responses":
                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
                            total_tokens = (
                                getattr(response.usage, 'total_tokens', None)
                                or (prompt_tokens + completion_tokens)
                            )
                        else:
                            prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
                            completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
                            total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
                        usage_dict = {
                            "prompt_tokens": prompt_tokens,
                            "completion_tokens": completion_tokens,
                            "total_tokens": total_tokens,
                        }
                        self.context_compressor.update_from_response(usage_dict)

                        # Cache discovered context length after successful call
                        if self.context_compressor._context_probed:
                            ctx = self.context_compressor.context_length
                            save_context_length(self.model, self.base_url, ctx)
                            print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
                            self.context_compressor._context_probed = False

                        self.session_prompt_tokens += prompt_tokens
                        self.session_completion_tokens += completion_tokens
                        self.session_total_tokens += total_tokens
                        self.session_api_calls += 1
                        
                        if self.verbose_logging:
                            logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
                        
                        # Log cache hit stats when prompt caching is active
                        if self._use_prompt_caching:
                            details = getattr(response.usage, 'prompt_tokens_details', None)
                            cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
                            written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
                            prompt = usage_dict["prompt_tokens"]
                            hit_pct = (cached / prompt * 100) if prompt > 0 else 0
                            if not self.quiet_mode:
                                print(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
                    
                    break  # Success, exit retry loop

                except InterruptedError:
                    if thinking_spinner:
                        thinking_spinner.stop("")
                        thinking_spinner = None
                    if self.thinking_callback:
                        self.thinking_callback("")
                    api_elapsed = time.time() - api_start_time
                    print(f"{self.log_prefix}⚡ Interrupted during API call.")
                    self._persist_session(messages, conversation_history)
                    interrupted = True
                    final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
                    break

                except Exception as api_error:
                    # Stop spinner before printing error messages
                    if thinking_spinner:
                        thinking_spinner.stop(f"(╥_╥) error, retrying...")
                        thinking_spinner = None
                    if self.thinking_callback:
                        self.thinking_callback("")

                    status_code = getattr(api_error, "status_code", None)
                    if (
                        self.api_mode == "codex_responses"
                        and self.provider == "openai-codex"
                        and status_code == 401
                        and not codex_auth_retry_attempted
                    ):
                        codex_auth_retry_attempted = True
                        if self._try_refresh_codex_client_credentials(force=True):
                            print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
                            continue
                    if (
                        self.api_mode == "chat_completions"
                        and self.provider == "nous"
                        and status_code == 401
                        and not nous_auth_retry_attempted
                    ):
                        nous_auth_retry_attempted = True
                        if self._try_refresh_nous_client_credentials(force=True):
                            print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
                            continue

                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
                    
                    # Enhanced error logging
                    error_type = type(api_error).__name__
                    error_msg = str(api_error).lower()
                    
                    print(f"{self.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}")
                    print(f"{self.log_prefix}   ⏱️  Time elapsed before failure: {elapsed_time:.2f}s")
                    print(f"{self.log_prefix}   📝 Error: {str(api_error)[:200]}")
                    print(f"{self.log_prefix}   📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
                    
                    # Check for interrupt before deciding to retry
                    if self._interrupt_requested:
                        print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
                        self._persist_session(messages, conversation_history)
                        self.clear_interrupt()
                        return {
                            "final_response": f"Operation interrupted: handling API error ({error_type}: {str(api_error)[:80]}).",
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "interrupted": True,
                        }
                    
                    # Check for 413 payload-too-large BEFORE generic 4xx handler.
                    # A 413 is a payload-size error — the correct response is to
                    # compress history and retry, not abort immediately.
                    status_code = getattr(api_error, "status_code", None)
                    is_payload_too_large = (
                        status_code == 413
                        or 'request entity too large' in error_msg
                        or 'payload too large' in error_msg
                        or 'error code: 413' in error_msg
                    )

                    if is_payload_too_large:
                        compression_attempts += 1
                        if compression_attempts > max_compression_attempts:
                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
                            logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
                            self._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
                                "partial": True
                            }
                        print(f"{self.log_prefix}⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")

                        original_len = len(messages)
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message, approx_tokens=approx_tokens,
                            task_id=effective_task_id,
                        )

                        if len(messages) < original_len:
                            print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
                            time.sleep(2)  # Brief pause between compression retries
                            restart_with_compressed_messages = True
                            break
                        else:
                            print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
                            logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
                            self._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": "Request payload too large (413). Cannot compress further.",
                                "partial": True
                            }

                    # Check for context-length errors BEFORE generic 4xx handler.
                    # Local backends (LM Studio, Ollama, llama.cpp) often return
                    # HTTP 400 with messages like "Context size has been exceeded"
                    # which must trigger compression, not an immediate abort.
                    is_context_length_error = any(phrase in error_msg for phrase in [
                        'context length', 'context size', 'maximum context',
                        'token limit', 'too many tokens', 'reduce the length',
                        'exceeds the limit', 'context window',
                        'request entity too large',  # OpenRouter/Nous 413 safety net
                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
                    ])
                    
                    if is_context_length_error:
                        compressor = self.context_compressor
                        old_ctx = compressor.context_length

                        # Try to parse the actual limit from the error message
                        parsed_limit = parse_context_limit_from_error(error_msg)
                        if parsed_limit and parsed_limit < old_ctx:
                            new_ctx = parsed_limit
                            print(f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
                        else:
                            # Step down to the next probe tier
                            new_ctx = get_next_probe_tier(old_ctx)

                        if new_ctx and new_ctx < old_ctx:
                            compressor.context_length = new_ctx
                            compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
                            compressor._context_probed = True
                            print(f"{self.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
                        else:
                            print(f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...")

                        compression_attempts += 1
                        if compression_attempts > max_compression_attempts:
                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
                            logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
                            self._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
                                "partial": True
                            }
                        print(f"{self.log_prefix}   🗜️  Context compression attempt {compression_attempts}/{max_compression_attempts}...")

                        original_len = len(messages)
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message, approx_tokens=approx_tokens,
                            task_id=effective_task_id,
                        )

                        if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                            if len(messages) < original_len:
                                print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
                            time.sleep(2)  # Brief pause between compression retries
                            restart_with_compressed_messages = True
                            break
                        else:
                            # Can't compress further and already at minimum tier
                            print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
                            print(f"{self.log_prefix}   💡 The conversation has accumulated too much content.")
                            logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
                            self._persist_session(messages, conversation_history)
                            return {
                                "messages": messages,
                                "completed": False,
                                "api_calls": api_call_count,
                                "error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
                                "partial": True
                            }

                    # Check for non-retryable client errors (4xx HTTP status codes).
                    # These indicate a problem with the request itself (bad model ID,
                    # invalid API key, forbidden, etc.) and will never succeed on retry.
                    # Note: 413 and context-length errors are excluded — handled above.
                    # Also catch local validation errors (ValueError, TypeError) — these
                    # are programming bugs, not transient failures.
                    is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
                    is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
                        'error code: 401', 'error code: 403',
                        'error code: 404', 'error code: 422',
                        'is not a valid model', 'invalid model', 'model not found',
                        'invalid api key', 'invalid_api_key', 'authentication',
                        'unauthorized', 'forbidden', 'not found',
                    ])) and not is_context_length_error

                    if is_client_error:
                        # Try fallback before aborting — a different provider
                        # may not have the same issue (rate limit, auth, etc.)
                        if self._try_activate_fallback():
                            retry_count = 0
                            continue
                        self._dump_api_request_debug(
                            api_kwargs, reason="non_retryable_client_error", error=api_error,
                        )
                        print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
                        print(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.")
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
                        self._persist_session(messages, conversation_history)
                        return {
                            "final_response": None,
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "failed": True,
                            "error": str(api_error),
                        }

                    if retry_count >= max_retries:
                        # Try fallback before giving up entirely
                        if self._try_activate_fallback():
                            retry_count = 0
                            continue
                        print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
                        logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
                        logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
                        raise api_error

                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
                    logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
                    if retry_count >= max_retries:
                        print(f"{self.log_prefix}⚠️  API call failed after {retry_count} attempts: {str(api_error)[:100]}")
                        print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
                    
                    # Sleep in small increments so we can respond to interrupts quickly
                    # instead of blocking the entire wait_time in one sleep() call
                    sleep_end = time.time() + wait_time
                    while time.time() < sleep_end:
                        if self._interrupt_requested:
                            print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
                            self._persist_session(messages, conversation_history)
                            self.clear_interrupt()
                            return {
                                "final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "interrupted": True,
                            }
                        time.sleep(0.2)  # Check interrupt every 200ms
            
            # If the API call was interrupted, skip response processing
            if interrupted:
                break

            if restart_with_compressed_messages:
                api_call_count -= 1
                self.iteration_budget.refund()
                continue

            if restart_with_length_continuation:
                continue

            # Guard: if all retries exhausted without a successful response
            # (e.g. repeated context-length errors that exhausted retry_count),
            # the `response` variable is still None. Break out cleanly.
            if response is None:
                print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
                self._persist_session(messages, conversation_history)
                break

            try:
                if self.api_mode == "codex_responses":
                    assistant_message, finish_reason = self._normalize_codex_response(response)
                else:
                    assistant_message = response.choices[0].message
                
                # Normalize content to string — some OpenAI-compatible servers
                # (llama-server, etc.) return content as a dict or list instead
                # of a plain string, which crashes downstream .strip() calls.
                if assistant_message.content is not None and not isinstance(assistant_message.content, str):
                    raw = assistant_message.content
                    if isinstance(raw, dict):
                        assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
                    elif isinstance(raw, list):
                        # Multimodal content list — extract text parts
                        parts = []
                        for part in raw:
                            if isinstance(part, str):
                                parts.append(part)
                            elif isinstance(part, dict) and part.get("type") == "text":
                                parts.append(part.get("text", ""))
                            elif isinstance(part, dict) and "text" in part:
                                parts.append(str(part["text"]))
                        assistant_message.content = "\n".join(parts)
                    else:
                        assistant_message.content = str(raw)

                # Handle assistant response
                if assistant_message.content and not self.quiet_mode:
                    print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")

                # Notify progress callback of model's thinking (used by subagent
                # delegation to relay the child's reasoning to the parent display).
                # Guard: only fire for subagents (_delegate_depth >= 1) to avoid
                # spamming gateway platforms with the main agent's every thought.
                if (assistant_message.content and self.tool_progress_callback
                        and getattr(self, '_delegate_depth', 0) > 0):
                    _think_text = assistant_message.content.strip()
                    # Strip reasoning XML tags that shouldn't leak to parent display
                    _think_text = re.sub(
                        r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
                    ).strip()
                    first_line = _think_text.split('\n')[0][:80] if _think_text else ""
                    if first_line:
                        try:
                            self.tool_progress_callback("_thinking", first_line)
                        except Exception:
                            pass
                
                # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
                # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
                if has_incomplete_scratchpad(assistant_message.content or ""):
                    if not hasattr(self, '_incomplete_scratchpad_retries'):
                        self._incomplete_scratchpad_retries = 0
                    self._incomplete_scratchpad_retries += 1
                    
                    print(f"{self.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
                    
                    if self._incomplete_scratchpad_retries <= 2:
                        print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
                        # Don't add the broken message, just retry
                        continue
                    else:
                        # Max retries - discard this turn and save as partial
                        print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
                        self._incomplete_scratchpad_retries = 0
                        
                        rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
                        self._cleanup_task_resources(effective_task_id)
                        self._persist_session(messages, conversation_history)
                        
                        return {
                            "final_response": None,
                            "messages": rolled_back_messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "partial": True,
                            "error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
                        }
                
                # Reset incomplete scratchpad counter on clean response
                if hasattr(self, '_incomplete_scratchpad_retries'):
                    self._incomplete_scratchpad_retries = 0

                if self.api_mode == "codex_responses" and finish_reason == "incomplete":
                    if not hasattr(self, "_codex_incomplete_retries"):
                        self._codex_incomplete_retries = 0
                    self._codex_incomplete_retries += 1

                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                    interim_has_content = bool((interim_msg.get("content") or "").strip())
                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False

                    if interim_has_content or interim_has_reasoning:
                        last_msg = messages[-1] if messages else None
                        duplicate_interim = (
                            isinstance(last_msg, dict)
                            and last_msg.get("role") == "assistant"
                            and last_msg.get("finish_reason") == "incomplete"
                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
                        )
                        if not duplicate_interim:
                            messages.append(interim_msg)

                    if self._codex_incomplete_retries < 3:
                        if not self.quiet_mode:
                            print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
                        self._session_messages = messages
                        self._save_session_log(messages)
                        continue

                    self._codex_incomplete_retries = 0
                    self._persist_session(messages, conversation_history)
                    return {
                        "final_response": None,
                        "messages": messages,
                        "api_calls": api_call_count,
                        "completed": False,
                        "partial": True,
                        "error": "Codex response remained incomplete after 3 continuation attempts",
                    }
                elif hasattr(self, "_codex_incomplete_retries"):
                    self._codex_incomplete_retries = 0
                
                # Check for tool calls
                if assistant_message.tool_calls:
                    if not self.quiet_mode:
                        print(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
                    
                    if self.verbose_logging:
                        for tc in assistant_message.tool_calls:
                            logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
                    
                    # Validate tool call names - detect model hallucinations
                    # Repair mismatched tool names before validating
                    for tc in assistant_message.tool_calls:
                        if tc.function.name not in self.valid_tool_names:
                            repaired = self._repair_tool_call(tc.function.name)
                            if repaired:
                                print(f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'")
                                tc.function.name = repaired
                    invalid_tool_calls = [
                        tc.function.name for tc in assistant_message.tool_calls
                        if tc.function.name not in self.valid_tool_names
                    ]
                    if invalid_tool_calls:
                        # Return helpful error to model — model can self-correct next turn
                        available = ", ".join(sorted(self.valid_tool_names))
                        invalid_name = invalid_tool_calls[0]
                        invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
                        print(f"{self.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for self-correction")
                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                        messages.append(assistant_msg)
                        for tc in assistant_message.tool_calls:
                            if tc.function.name not in self.valid_tool_names:
                                content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
                            else:
                                content = f"Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
                            messages.append({
                                "role": "tool",
                                "tool_call_id": tc.id,
                                "content": content,
                            })
                        continue
                    # Reset retry counter on successful tool call validation
                    if hasattr(self, '_invalid_tool_retries'):
                        self._invalid_tool_retries = 0
                    
                    # Validate tool call arguments are valid JSON
                    # Handle empty strings as empty objects (common model quirk)
                    invalid_json_args = []
                    for tc in assistant_message.tool_calls:
                        args = tc.function.arguments
                        # Treat empty/whitespace strings as empty object
                        if not args or not args.strip():
                            tc.function.arguments = "{}"
                            continue
                        try:
                            json.loads(args)
                        except json.JSONDecodeError as e:
                            invalid_json_args.append((tc.function.name, str(e)))
                    
                    if invalid_json_args:
                        # Track retries for invalid JSON arguments
                        self._invalid_json_retries += 1
                        
                        tool_name, error_msg = invalid_json_args[0]
                        print(f"{self.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
                        
                        if self._invalid_json_retries < 3:
                            print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
                            # Don't add anything to messages, just retry the API call
                            continue
                        else:
                            # Instead of returning partial, inject a helpful message and let model recover
                            print(f"{self.log_prefix}⚠️  Injecting recovery message for invalid JSON...")
                            self._invalid_json_retries = 0  # Reset for next attempt
                            
                            # Add a user message explaining the issue
                            recovery_msg = (
                                f"Your tool call to '{tool_name}' had invalid JSON arguments. "
                                f"Error: {error_msg}. "
                                f"For tools with no required parameters, use an empty object: {{}}. "
                                f"Please either retry the tool call with valid JSON, or respond without using that tool."
                            )
                            recovery_dict = {"role": "user", "content": recovery_msg}
                            messages.append(recovery_dict)
                            continue
                    
                    # Reset retry counter on successful JSON validation
                    self._invalid_json_retries = 0
                    
                    assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                    
                    # If this turn has both content AND tool_calls, capture the content
                    # as a fallback final response. Common pattern: model delivers its
                    # answer and calls memory/skill tools as a side-effect in the same
                    # turn. If the follow-up turn after tools is empty, we use this.
                    turn_content = assistant_message.content or ""
                    if turn_content and self._has_content_after_think_block(turn_content):
                        self._last_content_with_tools = turn_content
                        # Show intermediate commentary so the user can follow along
                        if self.quiet_mode:
                            clean = self._strip_think_blocks(turn_content).strip()
                            if clean:
                                print(f"  ┊ 💬 {clean}")
                    
                    messages.append(assistant_msg)
                    
                    _msg_count_before_tools = len(messages)
                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)

                    # Refund the iteration if the ONLY tool(s) called were
                    # execute_code (programmatic tool calling).  These are
                    # cheap RPC-style calls that shouldn't eat the budget.
                    _tc_names = {tc.function.name for tc in assistant_message.tool_calls}
                    if _tc_names == {"execute_code"}:
                        self.iteration_budget.refund()
                    
                    # Estimate next prompt size using real token counts from the
                    # last API response + rough estimate of newly appended tool
                    # results.  This catches cases where tool results push the
                    # context past the limit that last_prompt_tokens alone misses
                    # (e.g. large file reads, web extractions).
                    _compressor = self.context_compressor
                    _new_tool_msgs = messages[_msg_count_before_tools:]
                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
                    _estimated_next_prompt = (
                        _compressor.last_prompt_tokens
                        + _compressor.last_completion_tokens
                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
                    )
                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message,
                            approx_tokens=self.context_compressor.last_prompt_tokens,
                            task_id=effective_task_id,
                        )
                    
                    # Save session log incrementally (so progress is visible even if interrupted)
                    self._session_messages = messages
                    self._save_session_log(messages)
                    
                    # Continue loop for next response
                    continue
                
                else:
                    # No tool calls - this is the final response
                    final_response = assistant_message.content or ""
                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
                        # If the previous turn already delivered real content alongside
                        # tool calls (e.g. "You're welcome!" + memory save), the model
                        # has nothing more to say. Use the earlier content immediately
                        # instead of wasting API calls on retries that won't help.
                        fallback = getattr(self, '_last_content_with_tools', None)
                        if fallback:
                            logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
                            self._last_content_with_tools = None
                            self._empty_content_retries = 0
                            for i in range(len(messages) - 1, -1, -1):
                                msg = messages[i]
                                if msg.get("role") == "assistant" and msg.get("tool_calls"):
                                    tool_names = []
                                    for tc in msg["tool_calls"]:
                                        fn = tc.get("function", {})
                                        tool_names.append(fn.get("name", "unknown"))
                                    msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
                                    break
                            final_response = self._strip_think_blocks(fallback).strip()
                            break

                        # No fallback available — this is a genuine empty response.
                        # Retry in case the model just had a bad generation.
                        if not hasattr(self, '_empty_content_retries'):
                            self._empty_content_retries = 0
                        self._empty_content_retries += 1
                        
                        reasoning_text = self._extract_reasoning(assistant_message)
                        print(f"{self.log_prefix}⚠️  Response only contains think block with no content after it")
                        if reasoning_text:
                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
                            print(f"{self.log_prefix}   Reasoning: {reasoning_preview}")
                        else:
                            content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
                            print(f"{self.log_prefix}   Content: '{content_preview}'")
                        
                        if self._empty_content_retries < 3:
                            print(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
                            continue
                        else:
                            print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
                            self._empty_content_retries = 0
                            
                            # If a prior tool_calls turn had real content, salvage it:
                            # rewrite that turn's content to a brief tool description,
                            # and use the original content as the final response here.
                            fallback = getattr(self, '_last_content_with_tools', None)
                            if fallback:
                                self._last_content_with_tools = None
                                # Find the last assistant message with tool_calls and rewrite it
                                for i in range(len(messages) - 1, -1, -1):
                                    msg = messages[i]
                                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
                                        tool_names = []
                                        for tc in msg["tool_calls"]:
                                            fn = tc.get("function", {})
                                            tool_names.append(fn.get("name", "unknown"))
                                        msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
                                        break
                                # Strip <think> blocks from fallback content for user display
                                final_response = self._strip_think_blocks(fallback).strip()
                                break
                            
                            # No fallback -- append the empty message as-is
                            empty_msg = {
                                "role": "assistant",
                                "content": final_response,
                                "reasoning": reasoning_text,
                                "finish_reason": finish_reason,
                            }
                            messages.append(empty_msg)
                            
                            self._cleanup_task_resources(effective_task_id)
                            self._persist_session(messages, conversation_history)
                            
                            return {
                                "final_response": final_response or None,
                                "messages": messages,
                                "api_calls": api_call_count,
                                "completed": False,
                                "partial": True,
                                "error": "Model generated only think blocks with no actual response after 3 retries"
                            }
                    
                    # Reset retry counter on successful content
                    if hasattr(self, '_empty_content_retries'):
                        self._empty_content_retries = 0

                    if (
                        self.api_mode == "codex_responses"
                        and self.valid_tool_names
                        and codex_ack_continuations < 2
                        and self._looks_like_codex_intermediate_ack(
                            user_message=user_message,
                            assistant_content=final_response,
                            messages=messages,
                        )
                    ):
                        codex_ack_continuations += 1
                        interim_msg = self._build_assistant_message(assistant_message, "incomplete")
                        messages.append(interim_msg)

                        continue_msg = {
                            "role": "user",
                            "content": (
                                "[System: Continue now. Execute the required tool calls and only "
                                "send your final answer after completing the task.]"
                            ),
                        }
                        messages.append(continue_msg)
                        self._session_messages = messages
                        self._save_session_log(messages)
                        continue

                    codex_ack_continuations = 0

                    if truncated_response_prefix:
                        final_response = truncated_response_prefix + final_response
                    
                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
                    final_response = self._strip_think_blocks(final_response).strip()
                    
                    final_msg = self._build_assistant_message(assistant_message, finish_reason)
                    
                    messages.append(final_msg)
                    
                    if not self.quiet_mode:
                        print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
                
            except Exception as e:
                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
                print(f"❌ {error_msg}")
                
                if self.verbose_logging:
                    logging.exception("Detailed error information:")
                
                # If an assistant message with tool_calls was already appended,
                # the API expects a role="tool" result for every tool_call_id.
                # Fill in error results for any that weren't answered yet.
                pending_handled = False
                for idx in range(len(messages) - 1, -1, -1):
                    msg = messages[idx]
                    if not isinstance(msg, dict):
                        break
                    if msg.get("role") == "tool":
                        continue
                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
                        answered_ids = {
                            m["tool_call_id"]
                            for m in messages[idx + 1:]
                            if isinstance(m, dict) and m.get("role") == "tool"
                        }
                        for tc in msg["tool_calls"]:
                            if tc["id"] not in answered_ids:
                                err_msg = {
                                    "role": "tool",
                                    "tool_call_id": tc["id"],
                                    "content": f"Error executing tool: {error_msg}",
                                }
                                messages.append(err_msg)
                        pending_handled = True
                    break
                
                if not pending_handled:
                    # Error happened before tool processing (e.g. response parsing).
                    # Use a user-role message so the model can see what went wrong
                    # without confusing the API with a fabricated assistant turn.
                    sys_err_msg = {
                        "role": "user",
                        "content": f"[System error during processing: {error_msg}]",
                    }
                    messages.append(sys_err_msg)
                
                # If we're near the limit, break to avoid infinite loops
                if api_call_count >= self.max_iterations - 1:
                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
                    break
        
        if final_response is None and (
            api_call_count >= self.max_iterations
            or self.iteration_budget.remaining <= 0
        ):
            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
                print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
        completed = final_response is not None and api_call_count < self.max_iterations

        # Save trajectory if enabled
        self._save_trajectory(messages, user_message, completed)

        # Clean up VM and browser for this task after conversation completes
        self._cleanup_task_resources(effective_task_id)

        # Persist session to both JSON log and SQLite
        self._persist_session(messages, conversation_history)

        # Sync conversation to Honcho for user modeling
        if final_response and not interrupted:
            self._honcho_sync(original_user_message, final_response)

        # Extract reasoning from the last assistant message (if any)
        last_reasoning = None
        for msg in reversed(messages):
            if msg.get("role") == "assistant" and msg.get("reasoning"):
                last_reasoning = msg["reasoning"]
                break

        # Build result with interrupt info if applicable
        result = {
            "final_response": final_response,
            "last_reasoning": last_reasoning,
            "messages": messages,
            "api_calls": api_call_count,
            "completed": completed,
            "partial": False,  # True only when stopped due to invalid tool calls
            "interrupted": interrupted,
        }
        
        # Include interrupt message if one triggered the interrupt
        if interrupted and self._interrupt_message:
            result["interrupt_message"] = self._interrupt_message
        
        # Clear interrupt state after handling
        self.clear_interrupt()
        
        return result
    
    def chat(self, message: str) -> str:
        """
        Simple chat interface that returns just the final response.
        
        Args:
            message (str): User message
            
        Returns:
            str: Final assistant response
        """
        result = self.run_conversation(message)
        return result["final_response"]


def main(
    query: str = None,
    model: str = "anthropic/claude-opus-4.6",
    api_key: str = None,
    base_url: str = "https://openrouter.ai/api/v1",
    max_turns: int = 10,
    enabled_toolsets: str = None,
    disabled_toolsets: str = None,
    list_tools: bool = False,
    save_trajectories: bool = False,
    save_sample: bool = False,
    verbose: bool = False,
    log_prefix_chars: int = 20
):
    """
    Main function for running the agent directly.

    Args:
        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
        model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4.6.
        api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
        base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
        max_turns (int): Maximum number of API call iterations. Defaults to 10.
        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
                              toolsets (e.g., "research", "development", "safe").
                              Multiple toolsets can be combined: "web,vision"
        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
        list_tools (bool): Just list available tools and exit
        save_trajectories (bool): Save conversation trajectories to JSONL files (appends to trajectory_samples.jsonl). Defaults to False.
        save_sample (bool): Save a single trajectory sample to a UUID-named JSONL file for inspection. Defaults to False.
        verbose (bool): Enable verbose logging for debugging. Defaults to False.
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.

    Toolset Examples:
        - "research": Web search, extract, crawl + vision tools
    """
    print("🤖 AI Agent with Tool Calling")
    print("=" * 50)
    
    # Handle tool listing
    if list_tools:
        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
        from toolsets import get_all_toolsets, get_toolset_info
        
        print("📋 Available Tools & Toolsets:")
        print("-" * 50)
        
        # Show new toolsets system
        print("\n🎯 Predefined Toolsets (New System):")
        print("-" * 40)
        all_toolsets = get_all_toolsets()
        
        # Group by category
        basic_toolsets = []
        composite_toolsets = []
        scenario_toolsets = []
        
        for name, toolset in all_toolsets.items():
            info = get_toolset_info(name)
            if info:
                entry = (name, info)
                if name in ["web", "terminal", "vision", "creative", "reasoning"]:
                    basic_toolsets.append(entry)
                elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
                    composite_toolsets.append(entry)
                else:
                    scenario_toolsets.append(entry)
        
        # Print basic toolsets
        print("\n📌 Basic Toolsets:")
        for name, info in basic_toolsets:
            tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
            print(f"  • {name:15} - {info['description']}")
            print(f"    Tools: {tools_str}")
        
        # Print composite toolsets
        print("\n📂 Composite Toolsets (built from other toolsets):")
        for name, info in composite_toolsets:
            includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
            print(f"  • {name:15} - {info['description']}")
            print(f"    Includes: {includes_str}")
            print(f"    Total tools: {info['tool_count']}")
        
        # Print scenario-specific toolsets
        print("\n🎭 Scenario-Specific Toolsets:")
        for name, info in scenario_toolsets:
            print(f"  • {name:20} - {info['description']}")
            print(f"    Total tools: {info['tool_count']}")
        
        
        # Show legacy toolset compatibility
        print("\n📦 Legacy Toolsets (for backward compatibility):")
        legacy_toolsets = get_available_toolsets()
        for name, info in legacy_toolsets.items():
            status = "✅" if info["available"] else "❌"
            print(f"  {status} {name}: {info['description']}")
            if not info["available"]:
                print(f"    Requirements: {', '.join(info['requirements'])}")
        
        # Show individual tools
        all_tools = get_all_tool_names()
        print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
        for tool_name in sorted(all_tools):
            toolset = get_toolset_for_tool(tool_name)
            print(f"  📌 {tool_name} (from {toolset})")
        
        print(f"\n💡 Usage Examples:")
        print(f"  # Use predefined toolsets")
        print(f"  python run_agent.py --enabled_toolsets=research --query='search for Python news'")
        print(f"  python run_agent.py --enabled_toolsets=development --query='debug this code'")
        print(f"  python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
        print(f"  ")
        print(f"  # Combine multiple toolsets")
        print(f"  python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
        print(f"  ")
        print(f"  # Disable toolsets")
        print(f"  python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
        print(f"  ")
        print(f"  # Run with trajectory saving enabled")
        print(f"  python run_agent.py --save_trajectories --query='your question here'")
        return
    
    # Parse toolset selection arguments
    enabled_toolsets_list = None
    disabled_toolsets_list = None
    
    if enabled_toolsets:
        enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
        print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
    
    if disabled_toolsets:
        disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
        print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
    
    if save_trajectories:
        print(f"💾 Trajectory saving: ENABLED")
        print(f"   - Successful conversations → trajectory_samples.jsonl")
        print(f"   - Failed conversations → failed_trajectories.jsonl")
    
    # Initialize agent with provided parameters
    try:
        agent = AIAgent(
            base_url=base_url,
            model=model,
            api_key=api_key,
            max_iterations=max_turns,
            enabled_toolsets=enabled_toolsets_list,
            disabled_toolsets=disabled_toolsets_list,
            save_trajectories=save_trajectories,
            verbose_logging=verbose,
            log_prefix_chars=log_prefix_chars
        )
    except RuntimeError as e:
        print(f"❌ Failed to initialize agent: {e}")
        return
    
    # Use provided query or default to Python 3.13 example
    if query is None:
        user_query = (
            "Tell me about the latest developments in Python 3.13 and what new features "
            "developers should know about. Please search for current information and try it out."
        )
    else:
        user_query = query
    
    print(f"\n📝 User Query: {user_query}")
    print("\n" + "=" * 50)
    
    # Run conversation
    result = agent.run_conversation(user_query)
    
    print("\n" + "=" * 50)
    print("📋 CONVERSATION SUMMARY")
    print("=" * 50)
    print(f"✅ Completed: {result['completed']}")
    print(f"📞 API Calls: {result['api_calls']}")
    print(f"💬 Messages: {len(result['messages'])}")
    
    if result['final_response']:
        print(f"\n🎯 FINAL RESPONSE:")
        print("-" * 30)
        print(result['final_response'])
    
    # Save sample trajectory to UUID-named file if requested
    if save_sample:
        sample_id = str(uuid.uuid4())[:8]
        sample_filename = f"sample_{sample_id}.json"
        
        # Convert messages to trajectory format (same as batch_runner)
        trajectory = agent._convert_to_trajectory_format(
            result['messages'], 
            user_query, 
            result['completed']
        )
        
        entry = {
            "conversations": trajectory,
            "timestamp": datetime.now().isoformat(),
            "model": model,
            "completed": result['completed'],
            "query": user_query
        }
        
        try:
            with open(sample_filename, "w", encoding="utf-8") as f:
                # Pretty-print JSON with indent for readability
                f.write(json.dumps(entry, ensure_ascii=False, indent=2))
            print(f"\n💾 Sample trajectory saved to: {sample_filename}")
        except Exception as e:
            print(f"\n⚠️ Failed to save sample: {e}")
    
    print("\n👋 Agent execution completed!")


if __name__ == "__main__":
    fire.Fire(main)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								#!/usr/bin/env python3
 								"""
 								AI Agent Runner with Tool Calling
 								This module provides a clean, standalone agent that can execute AI models
 								with tool calling capabilities. It handles the conversation loop, tool execution,
 								and response management.
 								Features:
 								- Automatic tool calling loop until completion
 								- Configurable model parameters
 								- Error handling and recovery
 								- Message history management
 								- Support for multiple model providers
 								Usage:
 								    from run_agent import AIAgent
 								    agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
 								    response = agent.run_conversation("Tell me about the latest Python updates")
 								"""
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
+								import copy
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								import hashlib
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								import json
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								import logging
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								logger = logging.getLogger(__name__)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								import os
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								import random
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								import re
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								import sys
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								import time
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								import threading
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								from types import SimpleNamespace
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								import uuid
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								from typing import List, Dict, Any, Optional
 								from openai import OpenAI
 								import fire
 								from datetime import datetime
-												Add environment configuration file, restructure tool imports, and enhance README setup instructions

											
										
										
											2025-10-01 09:54:17 +00:00
+								from pathlib import Path
-												fix: resolve .env and config paths from HERMES_HOME, not PROJECT_ROOT

The `hermes` CLI entry point (hermes_cli/main.py) and the agent runner
(run_agent.py) only loaded .env from the project installation directory.
After the standard installer, code lives at ~/.hermes/hermes-agent/ but
config lives at ~/.hermes/ — so the .env was never found.

Aligns these entry points with the pattern already used by gateway/run.py
and rl_cli.py: load ~/.hermes/.env first, fall back to project root .env
for dev-mode compatibility.

Also fixes:
- status.py checking .env existence and API keys at PROJECT_ROOT
- doctor.py KeyError on tool availability (missing_vars vs env_vars)
- doctor.py checking logs/ and Skills Hub at PROJECT_ROOT instead of HERMES_HOME
- doctor.py redundant logs/ check (already covered by subdirectory loop)
- mini-swe-agent loading config from platformdirs default instead of ~/.hermes/

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 16:49:14 +11:00
+								# Load .env from ~/.hermes/.env first, then project root as dev fallback
-												Add environment configuration file, restructure tool imports, and enhance README setup instructions

											
										
										
											2025-10-01 09:54:17 +00:00
+								from dotenv import load_dotenv
-												fix: resolve .env and config paths from HERMES_HOME, not PROJECT_ROOT

The `hermes` CLI entry point (hermes_cli/main.py) and the agent runner
(run_agent.py) only loaded .env from the project installation directory.
After the standard installer, code lives at ~/.hermes/hermes-agent/ but
config lives at ~/.hermes/ — so the .env was never found.

Aligns these entry points with the pattern already used by gateway/run.py
and rl_cli.py: load ~/.hermes/.env first, fall back to project root .env
for dev-mode compatibility.

Also fixes:
- status.py checking .env existence and API keys at PROJECT_ROOT
- doctor.py KeyError on tool availability (missing_vars vs env_vars)
- doctor.py checking logs/ and Skills Hub at PROJECT_ROOT instead of HERMES_HOME
- doctor.py redundant logs/ check (already covered by subdirectory loop)
- mini-swe-agent loading config from platformdirs default instead of ~/.hermes/

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 16:49:14 +11:00
+								_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 								_user_env = _hermes_home / ".env"
 								_project_env = Path(__file__).parent / '.env'
 								if _user_env.exists():
-												More fixes for windoze

											
										
										
											2026-02-25 15:20:42 -08:00
+								    try:
-												fix: resolve .env and config paths from HERMES_HOME, not PROJECT_ROOT

The `hermes` CLI entry point (hermes_cli/main.py) and the agent runner
(run_agent.py) only loaded .env from the project installation directory.
After the standard installer, code lives at ~/.hermes/hermes-agent/ but
config lives at ~/.hermes/ — so the .env was never found.

Aligns these entry points with the pattern already used by gateway/run.py
and rl_cli.py: load ~/.hermes/.env first, fall back to project root .env
for dev-mode compatibility.

Also fixes:
- status.py checking .env existence and API keys at PROJECT_ROOT
- doctor.py KeyError on tool availability (missing_vars vs env_vars)
- doctor.py checking logs/ and Skills Hub at PROJECT_ROOT instead of HERMES_HOME
- doctor.py redundant logs/ check (already covered by subdirectory loop)
- mini-swe-agent loading config from platformdirs default instead of ~/.hermes/

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 16:49:14 +11:00
+								        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-												More fixes for windoze

											
										
										
											2026-02-25 15:20:42 -08:00
+								    except UnicodeDecodeError:
-												fix: resolve .env and config paths from HERMES_HOME, not PROJECT_ROOT

The `hermes` CLI entry point (hermes_cli/main.py) and the agent runner
(run_agent.py) only loaded .env from the project installation directory.
After the standard installer, code lives at ~/.hermes/hermes-agent/ but
config lives at ~/.hermes/ — so the .env was never found.

Aligns these entry points with the pattern already used by gateway/run.py
and rl_cli.py: load ~/.hermes/.env first, fall back to project root .env
for dev-mode compatibility.

Also fixes:
- status.py checking .env existence and API keys at PROJECT_ROOT
- doctor.py KeyError on tool availability (missing_vars vs env_vars)
- doctor.py checking logs/ and Skills Hub at PROJECT_ROOT instead of HERMES_HOME
- doctor.py redundant logs/ check (already covered by subdirectory loop)
- mini-swe-agent loading config from platformdirs default instead of ~/.hermes/

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 16:49:14 +11:00
+								        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
 								    logger.info("Loaded environment variables from %s", _user_env)
 								elif _project_env.exists():
 								    try:
 								        load_dotenv(dotenv_path=_project_env, encoding="utf-8")
 								    except UnicodeDecodeError:
 								        load_dotenv(dotenv_path=_project_env, encoding="latin-1")
 								    logger.info("Loaded environment variables from %s", _project_env)
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								else:
-												fix: resolve .env and config paths from HERMES_HOME, not PROJECT_ROOT

The `hermes` CLI entry point (hermes_cli/main.py) and the agent runner
(run_agent.py) only loaded .env from the project installation directory.
After the standard installer, code lives at ~/.hermes/hermes-agent/ but
config lives at ~/.hermes/ — so the .env was never found.

Aligns these entry points with the pattern already used by gateway/run.py
and rl_cli.py: load ~/.hermes/.env first, fall back to project root .env
for dev-mode compatibility.

Also fixes:
- status.py checking .env existence and API keys at PROJECT_ROOT
- doctor.py KeyError on tool availability (missing_vars vs env_vars)
- doctor.py checking logs/ and Skills Hub at PROJECT_ROOT instead of HERMES_HOME
- doctor.py redundant logs/ check (already covered by subdirectory loop)
- mini-swe-agent loading config from platformdirs default instead of ~/.hermes/

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-26 16:49:14 +11:00
+								    logger.info("No .env file found. Using system environment variables.")
 								# Point mini-swe-agent at ~/.hermes/ so it shares our config
 								os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
 								os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								# Import our tool system
 								from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								from tools.terminal_tool import cleanup_vm
 								from tools.interrupt import set_interrupt as _set_interrupt
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								from tools.browser_tool import cleanup_browser
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								import requests
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								from hermes_constants import OPENROUTER_BASE_URL, OPENROUTER_MODELS_URL
-												Refactor Terminal and AIAgent cleanup

											
										
										
											2026-02-21 22:31:43 -08:00
+								# Agent internals extracted to agent/ package for modularity
-												Enhance agent guidance with memory and session search tools

- Introduced MEMORY_GUIDANCE and SESSION_SEARCH_GUIDANCE to improve agent's contextual awareness and proactive assistance.
- Updated AIAgent to conditionally include tool-aware guidance in prompts based on available tools.
- Enhanced descriptions in memory and session search schemas for clearer user instructions on when to utilize these features.

											
										
										
											2026-02-22 02:31:52 -08:00
+								from agent.prompt_builder import (
 								    DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								    MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
-												Enhance agent guidance with memory and session search tools

- Introduced MEMORY_GUIDANCE and SESSION_SEARCH_GUIDANCE to improve agent's contextual awareness and proactive assistance.
- Updated AIAgent to conditionally include tool-aware guidance in prompts based on available tools.
- Enhanced descriptions in memory and session search schemas for clearer user instructions on when to utilize these features.

											
										
										
											2026-02-22 02:31:52 -08:00
+								)
-												Refactor Terminal and AIAgent cleanup

											
										
										
											2026-02-21 22:31:43 -08:00
+								from agent.model_metadata import (
 								    fetch_model_metadata, get_model_context_length,
 								    estimate_tokens_rough, estimate_messages_tokens_rough,
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								    get_next_probe_tier, parse_context_limit_from_error,
 								    save_context_length,
-												Refactor Terminal and AIAgent cleanup

											
										
										
											2026-02-21 22:31:43 -08:00
+								)
 								from agent.context_compressor import ContextCompressor
 								from agent.prompt_caching import apply_anthropic_cache_control
 								from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
 								from agent.display import (
 								    KawaiiSpinner, build_tool_preview as _build_tool_preview,
 								    get_cute_tool_message as _get_cute_tool_message_impl,
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								    _detect_tool_failure,
-												Refactor Terminal and AIAgent cleanup

											
										
										
											2026-02-21 22:31:43 -08:00
+								)
 								from agent.trajectory import (
 								    convert_scratchpad_to_think, has_incomplete_scratchpad,
 								    save_trajectory as _save_trajectory_to_file,
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
+								)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								class IterationBudget:
 								    """Thread-safe shared iteration counter for parent and child agents.
 								    Tracks total LLM-call iterations consumed across a parent agent and all
 								    its subagents.  A single ``IterationBudget`` is created by the parent
 								    and passed to every child so they share the same cap.
 								    ``execute_code`` (programmatic tool calling) iterations are refunded via
 								    :meth:`refund` so they don't eat into the budget.
 								    """
 								    def __init__(self, max_total: int):
 								        self.max_total = max_total
 								        self._used = 0
 								        self._lock = threading.Lock()
 								    def consume(self) -> bool:
 								        """Try to consume one iteration.  Returns True if allowed."""
 								        with self._lock:
 								            if self._used >= self.max_total:
 								                return False
 								            self._used += 1
 								            return True
 								    def refund(self) -> None:
 								        """Give back one iteration (e.g. for execute_code turns)."""
 								        with self._lock:
 								            if self._used > 0:
 								                self._used -= 1
 								    @property
 								    def used(self) -> int:
 								        return self._used
 								    @property
 								    def remaining(self) -> int:
 								        with self._lock:
 								            return max(0, self.max_total - self._used)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								class AIAgent:
 								    """
 								    AI Agent with tool calling capabilities.
 								    This class manages the conversation flow, tool execution, and response handling
 								    for AI models that support function calling.
 								    """
 								    def __init__(
-												fix leakage

											
										
										
											2025-11-03 17:42:23 -05:00
+								        self,
 								        base_url: str = None,
 								        api_key: str = None,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								        provider: str = None,
 								        api_mode: str = None,
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								        model: str = "anthropic/claude-opus-4.6",  # OpenRouter format
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								        max_iterations: int = 90,  # Default tool-calling iterations (shared with subagents)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        tool_delay: float = 1.0,
 								        enabled_toolsets: List[str] = None,
 								        disabled_toolsets: List[str] = None,
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								        save_trajectories: bool = False,
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
+								        verbose_logging: bool = False,
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        quiet_mode: bool = False,
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								        ephemeral_system_prompt: str = None,
 								        log_prefix_chars: int = 100,
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								        log_prefix: str = "",
 								        providers_allowed: List[str] = None,
 								        providers_ignored: List[str] = None,
 								        providers_order: List[str] = None,
 								        provider_sort: str = None,
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        provider_require_parameters: bool = False,
 								        provider_data_collection: str = None,
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        session_id: str = None,
-												Add tool progress notifications for messaging channels

- Introduced a new callback mechanism in the AIAgent class to send tool progress messages during execution, enhancing user feedback in messaging platforms.
- Updated the GatewayRunner to support tool progress notifications, allowing users to enable or disable this feature via environment variables.
- Enhanced the CLI setup wizard to prompt users for enabling tool progress messages and selecting the notification mode (all or new), improving configuration options.
- Updated relevant documentation to reflect the new features and configuration settings for tool progress notifications.

											
										
										
											2026-02-03 14:54:43 -08:00
+								        tool_progress_callback: callable = None,
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								        thinking_callback: callable = None,
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								        reasoning_callback: callable = None,
-												feat: introduce clarifying questions tool for interactive user engagement

- Added a new `clarify_tool` to enable the agent to ask structured multiple-choice or open-ended questions to users.
- Implemented callback functionality for user interaction, allowing the platform to handle UI presentation.
- Updated the CLI and agent to support clarify questions, including timeout handling and response management.
- Enhanced toolset definitions and requirements to include the clarify tool, ensuring availability across platforms.

											
										
										
											2026-02-19 20:06:14 -08:00
+								        clarify_callback: callable = None,
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        step_callback: callable = None,
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								        max_tokens: int = None,
 								        reasoning_config: Dict[str, Any] = None,
 								        prefill_messages: List[Dict[str, Any]] = None,
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
+								        platform: str = None,
-												Add skip_context_files option to AIAgent for batch processing

- Introduced a new parameter `skip_context_files` in the AIAgent class to control the inclusion of context files (SOUL.md, AGENTS.md, .cursorrules) in the system prompt.
- Updated the _process_single_prompt function to set `skip_context_files` to True, preventing pollution of trajectories during batch processing and data generation.

											
										
										
											2026-02-16 22:40:31 -08:00
+								        skip_context_files: bool = False,
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        skip_memory: bool = False,
 								        session_db=None,
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								        honcho_session_key: str = None,
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								        iteration_budget: "IterationBudget" = None,
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        fallback_model: Dict[str, Any] = None,
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								        checkpoints_enabled: bool = False,
 								        checkpoint_max_snapshots: int = 50,
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    ):
 								        """
 								        Initialize the AI Agent.
-												fix leakage

											
										
										
											2025-11-03 17:42:23 -05:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        Args:
 								            base_url (str): Base URL for the model API (optional)
 								            api_key (str): API key for authentication (optional, uses env var if not provided)
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								            provider (str): Provider identifier (optional; used for telemetry/routing hints)
 								            api_mode (str): API mode override: "chat_completions" or "codex_responses"
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								            model (str): Model name to use (default: "anthropic/claude-opus-4.6")
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								            max_iterations (int): Maximum number of tool calling iterations (default: 90)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            tool_delay (float): Delay between tool calls in seconds (default: 1.0)
 								            enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
 								            disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
 								            save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								            verbose_logging (bool): Enable verbose logging for debugging (default: False)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								            quiet_mode (bool): Suppress progress output for clean CLI experience (default: False)
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
+								            ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								            log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 100)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								            log_prefix (str): Prefix to add to all log messages for identification in parallel processing (default: "")
 								            providers_allowed (List[str]): OpenRouter providers to allow (optional)
 								            providers_ignored (List[str]): OpenRouter providers to ignore (optional)
 								            providers_order (List[str]): OpenRouter providers to try in order (optional)
 								            provider_sort (str): Sort providers by price/throughput/latency (optional)
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								            session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
-												Add tool progress notifications for messaging channels

- Introduced a new callback mechanism in the AIAgent class to send tool progress messages during execution, enhancing user feedback in messaging platforms.
- Updated the GatewayRunner to support tool progress notifications, allowing users to enable or disable this feature via environment variables.
- Enhanced the CLI setup wizard to prompt users for enabling tool progress messages and selecting the notification mode (all or new), improving configuration options.
- Updated relevant documentation to reflect the new features and configuration settings for tool progress notifications.

											
										
										
											2026-02-03 14:54:43 -08:00
+								            tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
-												feat: introduce clarifying questions tool for interactive user engagement

- Added a new `clarify_tool` to enable the agent to ask structured multiple-choice or open-ended questions to users.
- Implemented callback functionality for user interaction, allowing the platform to handle UI presentation.
- Updated the CLI and agent to support clarify questions, including timeout handling and response management.
- Enhanced toolset definitions and requirements to include the clarify tool, ensuring availability across platforms.

											
										
										
											2026-02-19 20:06:14 -08:00
+								            clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
 								                Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								            max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
 								            reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								                If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								            prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
 								                Useful for injecting a few-shot example or priming the model's response style.
 								                Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
+								            platform (str): The interface platform the user is on (e.g. "cli", "telegram", "discord", "whatsapp").
 								                Used to inject platform-specific formatting hints into the system prompt.
-												Add skip_context_files option to AIAgent for batch processing

- Introduced a new parameter `skip_context_files` in the AIAgent class to control the inclusion of context files (SOUL.md, AGENTS.md, .cursorrules) in the system prompt.
- Updated the _process_single_prompt function to set `skip_context_files` to True, preventing pollution of trajectories during batch processing and data generation.

											
										
										
											2026-02-16 22:40:31 -08:00
+								            skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
 								                into the system prompt. Use this for batch processing and data generation to avoid
 								                polluting trajectories with user-specific persona or project instructions.
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								            honcho_session_key (str): Session key for Honcho integration (e.g., "telegram:123456" or CLI session_id).
 								                When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        """
 								        self.model = model
 								        self.max_iterations = max_iterations
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								        # Shared iteration budget — parent creates, children inherit.
 								        # Consumed by every LLM turn across parent + all subagents.
 								        self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        self.tool_delay = tool_delay
 								        self.save_trajectories = save_trajectories
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								        self.verbose_logging = verbose_logging
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        self.quiet_mode = quiet_mode
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
+								        self.ephemeral_system_prompt = ephemeral_system_prompt
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
+								        self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
-												Add skip_context_files option to AIAgent for batch processing

- Introduced a new parameter `skip_context_files` in the AIAgent class to control the inclusion of context files (SOUL.md, AGENTS.md, .cursorrules) in the system prompt.
- Updated the _process_single_prompt function to set `skip_context_files` to True, preventing pollution of trajectories during batch processing and data generation.

											
										
										
											2026-02-16 22:40:31 -08:00
+								        self.skip_context_files = skip_context_files
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								        self.log_prefix_chars = log_prefix_chars
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								        self.log_prefix = f"{log_prefix} " if log_prefix else ""
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
+								        # Store effective base URL for feature detection (prompt caching, reasoning, etc.)
 								        # When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								        self.base_url = base_url or OPENROUTER_BASE_URL
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
 								        self.provider = provider_name or "openrouter"
 								        if api_mode in {"chat_completions", "codex_responses"}:
 								            self.api_mode = api_mode
 								        elif self.provider == "openai-codex":
 								            self.api_mode = "codex_responses"
 								        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
 								            self.api_mode = "codex_responses"
 								            self.provider = "openai-codex"
 								        else:
 								            self.api_mode = "chat_completions"
-												fix: allow Anthropic API URLs as custom OpenAI-compatible endpoints

Removed the hard block on base_url containing 'api.anthropic.com'.
Anthropic now offers an OpenAI-compatible /chat/completions endpoint,
so blocking their URL prevents legitimate use. If the endpoint isn't
compatible, the API call will fail with a proper error anyway.

Removed from: run_agent.py, mini_swe_runner.py
Updated test to verify Anthropic URLs are accepted.

											
										
										
											2026-03-07 23:36:35 -08:00
-												Add tool progress notifications for messaging channels

- Introduced a new callback mechanism in the AIAgent class to send tool progress messages during execution, enhancing user feedback in messaging platforms.
- Updated the GatewayRunner to support tool progress notifications, allowing users to enable or disable this feature via environment variables.
- Enhanced the CLI setup wizard to prompt users for enabling tool progress messages and selecting the notification mode (all or new), improving configuration options.
- Updated relevant documentation to reflect the new features and configuration settings for tool progress notifications.

											
										
										
											2026-02-03 14:54:43 -08:00
+								        self.tool_progress_callback = tool_progress_callback
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								        self.thinking_callback = thinking_callback
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								        self.reasoning_callback = reasoning_callback
-												feat: introduce clarifying questions tool for interactive user engagement

- Added a new `clarify_tool` to enable the agent to ask structured multiple-choice or open-ended questions to users.
- Implemented callback functionality for user interaction, allowing the platform to handle UI presentation.
- Updated the CLI and agent to support clarify questions, including timeout handling and response management.
- Enhanced toolset definitions and requirements to include the clarify tool, ensuring availability across platforms.

											
										
										
											2026-02-19 20:06:14 -08:00
+								        self.clarify_callback = clarify_callback
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								        self.step_callback = step_callback
-												Add tool progress notifications for messaging channels

- Introduced a new callback mechanism in the AIAgent class to send tool progress messages during execution, enhancing user feedback in messaging platforms.
- Updated the GatewayRunner to support tool progress notifications, allowing users to enable or disable this feature via environment variables.
- Enhanced the CLI setup wizard to prompt users for enabling tool progress messages and selecting the notification mode (all or new), improving configuration options.
- Updated relevant documentation to reflect the new features and configuration settings for tool progress notifications.

											
										
										
											2026-02-03 14:54:43 -08:00
+								        self._last_reported_tool = None  # Track for "new tool" mode
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Interrupt mechanism for breaking out of tool loops
 								        self._interrupt_requested = False
 								        self._interrupt_message = None  # Optional message that triggered interrupt
-												feat: implement subagent delegation for task management

- Introduced the `delegate_task` tool, allowing the main agent to spawn child AIAgent instances with isolated context for complex tasks.
- Supported both single-task and batch processing (up to 3 concurrent tasks) to enhance task management capabilities.
- Updated configuration options for delegation, including maximum iterations and default toolsets for subagents.
- Enhanced documentation to provide clear guidance on using the delegation feature and its configuration.
- Added comprehensive tests to ensure the functionality and reliability of the delegation logic.

											
										
										
											2026-02-20 03:15:53 -08:00
+								        # Subagent delegation state
 								        self._delegate_depth = 0        # 0 = top-level agent, incremented for children
 								        self._active_children = []      # Running child AIAgents (for interrupt propagation)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								        # Store OpenRouter provider preferences
 								        self.providers_allowed = providers_allowed
 								        self.providers_ignored = providers_ignored
 								        self.providers_order = providers_order
 								        self.provider_sort = provider_sort
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        self.provider_require_parameters = provider_require_parameters
 								        self.provider_data_collection = provider_data_collection
-												fix leakage

											
										
										
											2025-11-03 17:42:23 -05:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        # Store toolset filtering options
 								        self.enabled_toolsets = enabled_toolsets
 								        self.disabled_toolsets = disabled_toolsets
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								        # Model response configuration
 								        self.max_tokens = max_tokens  # None = use model default
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								        self.reasoning_config = reasoning_config  # None = use default (medium for OpenRouter)
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
+								        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
 								        # Reduces input costs by ~75% on multi-turn conversations by caching the
 								        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
 								        is_openrouter = "openrouter" in self.base_url.lower()
 								        is_claude = "claude" in self.model.lower()
 								        self._use_prompt_caching = is_openrouter and is_claude
 								        self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
-												feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.

											
										
										
											2026-03-09 05:19:42 -07:00
+								        # Iteration budget pressure: warn the LLM as it approaches max_iterations.
 								        # Warnings are injected into the last tool result JSON (not as separate
 								        # messages) so they don't break message structure or invalidate caching.
 								        self._budget_caution_threshold = 0.7   # 70% — nudge to start wrapping up
 								        self._budget_warning_threshold = 0.9   # 90% — urgent, respond now
 								        self._budget_pressure_enabled = True
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
 								        # so tool failures, API errors, etc. are inspectable after the fact.
-												refactor(cli, auth): Add Codex/OpenAI OAuth Support - finalized

- Replace `hermes login` with `hermes model` for selecting providers and managing authentication.
- Update documentation and CLI commands to reflect the new provider selection process.
- Introduce a new redaction system for logging sensitive information.
- Enhance Codex model discovery by integrating API fetching and local cache.
- Adjust max turns configuration logic for better clarity and precedence.
- Improve error handling and user feedback during authentication processes.

											
										
										
											2026-02-28 21:56:05 -08:00
+								        from agent.redact import RedactingFormatter
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								        _error_log_dir = Path.home() / ".hermes" / "logs"
 								        _error_log_dir.mkdir(parents=True, exist_ok=True)
 								        _error_log_path = _error_log_dir / "errors.log"
 								        from logging.handlers import RotatingFileHandler
 								        _error_file_handler = RotatingFileHandler(
 								            _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
 								        )
 								        _error_file_handler.setLevel(logging.WARNING)
 								        _error_file_handler.setFormatter(RedactingFormatter(
 								            '%(asctime)s %(levelname)s %(name)s: %(message)s',
 								        ))
 								        logging.getLogger().addHandler(_error_file_handler)
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								        if self.verbose_logging:
 								            logging.basicConfig(
 								                level=logging.DEBUG,
 								                format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 								                datefmt='%H:%M:%S'
 								            )
-												refactor(cli, auth): Add Codex/OpenAI OAuth Support - finalized

- Replace `hermes login` with `hermes model` for selecting providers and managing authentication.
- Update documentation and CLI commands to reflect the new provider selection process.
- Introduce a new redaction system for logging sensitive information.
- Enhance Codex model discovery by integrating API fetching and local cache.
- Adjust max turns configuration logic for better clarity and precedence.
- Improve error handling and user feedback during authentication processes.

											
										
										
											2026-02-28 21:56:05 -08:00
+								            for handler in logging.getLogger().handlers:
 								                handler.setFormatter(RedactingFormatter(
 								                    '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 								                    datefmt='%H:%M:%S',
 								                ))
-												Enhance logging and tool initialization for improved performance

- Updated logging configuration in `run_agent.py` to suppress debug messages from additional third-party libraries, reducing noise in logs.
- Enhanced shell scripts for terminal tasks to utilize Singularity for containerized execution, including pre-build SIF image logic and improved logging.
- Refactored tool initialization in `mixture_of_agents_tool.py`, `vision_tools.py`, and `web_tools.py` to implement lazy loading of API clients, optimizing resource usage and error handling.
- Updated ephemeral system prompts in shell scripts to provide clearer guidance on task execution and resource usage.

											
										
										
											2026-01-29 19:59:59 +00:00
+								            # Keep third-party libraries at WARNING level to reduce noise
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								            # We have our own retry and error logging that's more informative
 								            logging.getLogger('openai').setLevel(logging.WARNING)
 								            logging.getLogger('openai._base_client').setLevel(logging.WARNING)
-												some bugfixes

											
										
										
											2025-10-15 18:07:06 +00:00
+								            logging.getLogger('httpx').setLevel(logging.WARNING)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								            logging.getLogger('httpcore').setLevel(logging.WARNING)
-												Enhance logging and tool initialization for improved performance

- Updated logging configuration in `run_agent.py` to suppress debug messages from additional third-party libraries, reducing noise in logs.
- Enhanced shell scripts for terminal tasks to utilize Singularity for containerized execution, including pre-build SIF image logic and improved logging.
- Refactored tool initialization in `mixture_of_agents_tool.py`, `vision_tools.py`, and `web_tools.py` to implement lazy loading of API clients, optimizing resource usage and error handling.
- Updated ephemeral system prompts in shell scripts to provide clearer guidance on task execution and resource usage.

											
										
										
											2026-01-29 19:59:59 +00:00
+								            logging.getLogger('asyncio').setLevel(logging.WARNING)
 								            # Suppress Modal/gRPC related debug spam
 								            logging.getLogger('hpack').setLevel(logging.WARNING)
 								            logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
 								            logging.getLogger('grpc').setLevel(logging.WARNING)
 								            logging.getLogger('modal').setLevel(logging.WARNING)
 								            logging.getLogger('rex-deploy').setLevel(logging.INFO)  # Keep INFO for sandbox status
-												refactor: implement structured logging across multiple modules

- Introduced logging functionality in cli.py, run_agent.py, scheduler.py, and various tool modules to replace print statements with structured logging.
- Enhanced error handling and informational messages to improve debugging and monitoring capabilities.
- Ensured consistent logging practices across the codebase, facilitating better traceability and maintenance.

											
										
										
											2026-02-21 03:11:11 -08:00
+								            logger.info("Verbose logging enabled (third-party library logs suppressed)")
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								        else:
 								            # Set logging to INFO level for important messages only
 								            logging.basicConfig(
 								                level=logging.INFO,
 								                format='%(asctime)s - %(levelname)s - %(message)s',
 								                datefmt='%H:%M:%S'
 								            )
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								            # Suppress noisy library logging
 								            logging.getLogger('openai').setLevel(logging.ERROR)
 								            logging.getLogger('openai._base_client').setLevel(logging.ERROR)
 								            logging.getLogger('httpx').setLevel(logging.ERROR)
 								            logging.getLogger('httpcore').setLevel(logging.ERROR)
-												feat: enhance logging in AIAgent for quiet mode

- Added functionality to suppress logging noise from specific modules when in quiet mode, improving user experience in CLI.
- Updated terminal_tool.py to change the log level for fallback directory usage from warning to debug, providing clearer context without cluttering logs.

											
										
										
											2026-02-21 12:41:05 -08:00
+								            if self.quiet_mode:
 								                # In quiet mode (CLI default), suppress all tool/infra log
 								                # noise. The TUI has its own rich display for status; logger
 								                # INFO/WARNING messages just clutter it.
 								                for quiet_logger in [
 								                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
 								                    'minisweagent',         # mini-swe-agent execution backend
 								                    'run_agent',            # agent runner internals
 								                    'trajectory_compressor',
 								                    'cron',                 # scheduler (only relevant in daemon mode)
 								                    'hermes_cli',           # CLI helpers
 								                ]:
 								                    logging.getLogger(quiet_logger).setLevel(logging.ERROR)
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
-												Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments.
- Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration.
- Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls.
- Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend.
- Updated `README.md` to reflect changes in setup instructions and environment variable configurations.
- Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling.
- Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.

											
										
										
											2026-01-23 12:26:53 +00:00
+								        # Initialize OpenAI client - defaults to OpenRouter
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        client_kwargs = {}
-												Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments.
- Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration.
- Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls.
- Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend.
- Updated `README.md` to reflect changes in setup instructions and environment variable configurations.
- Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling.
- Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.

											
										
										
											2026-01-23 12:26:53 +00:00
 								        # Default to OpenRouter if no base_url provided
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        if base_url:
 								            client_kwargs["base_url"] = base_url
-												Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments.
- Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration.
- Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls.
- Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend.
- Updated `README.md` to reflect changes in setup instructions and environment variable configurations.
- Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling.
- Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.

											
										
										
											2026-01-23 12:26:53 +00:00
+								        else:
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								            client_kwargs["base_url"] = OPENROUTER_BASE_URL
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments.
- Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration.
- Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls.
- Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend.
- Updated `README.md` to reflect changes in setup instructions and environment variable configurations.
- Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling.
- Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.

											
										
										
											2026-01-23 12:26:53 +00:00
+								        # Handle API key - OpenRouter is the primary provider
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        if api_key:
 								            client_kwargs["api_key"] = api_key
 								        else:
-												Update environment configuration and enhance terminal tool integration

- Updated `.env.example` to include new API keys and configuration options for the mini-swe-agent backend, including support for local, Docker, and Modal environments.
- Added `.gitmodules` to include mini-swe-agent as a submodule for easier integration.
- Refactored `mini_swe_runner.py` to use the updated model format and default to OpenRouter for API calls.
- Enhanced `model_tools.py` to support the new terminal tool definitions and ensure compatibility with the mini-swe-agent backend.
- Updated `README.md` to reflect changes in setup instructions and environment variable configurations.
- Improved `terminal_tool.py` to manage execution environments and lifecycle, ensuring proper cleanup and error handling.
- Introduced `terminal_hecate.py` for executing commands on MorphCloud VMs, providing an alternative backend for terminal operations.

											
										
										
											2026-01-23 12:26:53 +00:00
+								            # Primary: OPENROUTER_API_KEY, fallback to direct provider keys
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								            client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												add identifier for openrouter calls

											
										
										
											2026-02-25 16:34:47 -08:00
+								        # OpenRouter app attribution — shows hermes-agent in rankings/analytics
 								        effective_base = client_kwargs.get("base_url", "")
 								        if "openrouter" in effective_base.lower():
 								            client_kwargs["default_headers"] = {
 								                "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
 								                "X-OpenRouter-Title": "Hermes Agent",
-												fix(headers): update X-OpenRouter-Categories to include 'productivity'

											
										
										
											2026-02-28 10:38:49 -08:00
+								                "X-OpenRouter-Categories": "productivity,cli-agent",
-												add identifier for openrouter calls

											
										
										
											2026-02-25 16:34:47 -08:00
+								            }
-												fix: add Kimi Code API support (api.kimi.com/coding/v1)

Kimi Code (platform.kimi.ai) issues API keys prefixed sk-kimi- that require:
1. A different base URL: api.kimi.com/coding/v1 (not api.moonshot.ai/v1)
2. A User-Agent header identifying a recognized coding agent

Without this fix, sk-kimi- keys fail with 401 (wrong endpoint) or 403
('only available for Coding Agents') errors.

Changes:
- Auto-detect sk-kimi- key prefix and route to api.kimi.com/coding/v1
- Send User-Agent: KimiCLI/1.0 header for Kimi Code endpoints
- Legacy Moonshot keys (api.moonshot.ai) continue to work unchanged
- KIMI_BASE_URL env var override still takes priority over auto-detection
- Updated .env.example with correct docs and all endpoint options
- Fixed doctor.py health check for Kimi Code keys

Reference: https://github.com/MoonshotAI/kimi-cli (platforms.py)

											
										
										
											2026-03-07 20:43:34 -05:00
+								        elif "api.kimi.com" in effective_base.lower():
 								            # Kimi Code API requires a recognized coding-agent User-Agent
 								            # (see https://github.com/MoonshotAI/kimi-cli)
 								            client_kwargs["default_headers"] = {
 								                "User-Agent": "KimiCLI/1.0",
 								            }
-												add identifier for openrouter calls

											
										
										
											2026-02-25 16:34:47 -08:00
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        try:
 								            self.client = OpenAI(**client_kwargs)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								            if not self.quiet_mode:
 								                print(f"🤖 AI Agent initialized with model: {self.model}")
 								                if base_url:
 								                    print(f"🔗 Using custom base URL: {base_url}")
 								                # Always show API key info (masked) for debugging auth issues
 								                key_used = client_kwargs.get("api_key", "none")
 								                if key_used and key_used != "dummy-key" and len(key_used) > 12:
 								                    print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
 								                else:
 								                    print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        except Exception as e:
 								            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        # Provider fallback — a single backup model/provider tried when the
 								        # primary is exhausted (rate-limit, overload, connection failure).
 								        # Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
 								        self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
 								        self._fallback_activated = False
 								        if self._fallback_model:
 								            fb_p = self._fallback_model.get("provider", "")
 								            fb_m = self._fallback_model.get("model", "")
 								            if fb_p and fb_m and not self.quiet_mode:
 								                print(f"🔄 Fallback model: {fb_m} ({fb_p})")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        # Get available tools with filtering
 								        self.tools = get_tool_definitions(
 								            enabled_toolsets=enabled_toolsets,
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								            disabled_toolsets=disabled_toolsets,
 								            quiet_mode=self.quiet_mode,
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        )
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								        # Show tool configuration and store valid tool names for validation
 								        self.valid_tool_names = set()
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        if self.tools:
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								            self.valid_tool_names = {tool["function"]["name"] for tool in self.tools}
 								            tool_names = sorted(self.valid_tool_names)
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								            if not self.quiet_mode:
 								                print(f"🛠️  Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
 								                # Show filtering info if applied
 								                if enabled_toolsets:
 								                    print(f"   ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
 								                if disabled_toolsets:
 								                    print(f"   ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
 								        elif not self.quiet_mode:
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            print("🛠️  No tools loaded (all tools filtered out or unavailable)")
 								        # Check tool requirements
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        if self.tools and not self.quiet_mode:
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            requirements = check_toolset_requirements()
 								            missing_reqs = [name for name, available in requirements.items() if not available]
 								            if missing_reqs:
 								                print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
 								        # Show trajectory saving status
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        if self.save_trajectories and not self.quiet_mode:
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            print("📝 Trajectory saving enabled")
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
 								        # Show ephemeral system prompt status
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        if self.ephemeral_system_prompt and not self.quiet_mode:
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
+								            prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
 								            print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
+								        # Show prompt caching status
 								        if self._use_prompt_caching and not self.quiet_mode:
 								            print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        # Session logging setup - auto-save conversation trajectories for debugging
 								        self.session_start = datetime.now()
 								        if session_id:
 								            # Use provided session ID (e.g., from CLI)
 								            self.session_id = session_id
 								        else:
 								            # Generate a new session ID
 								            timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
 								            short_uuid = uuid.uuid4().hex[:6]
 								            self.session_id = f"{timestamp_str}_{short_uuid}"
-												fix: update session logging directory path in README and code

- Changed the session logging directory from `~/.hermes-agent/logs/` to `~/.hermes/sessions/` for consistency.
- Updated the `run_agent.py` to reflect the new logging path, ensuring session logs are stored correctly alongside gateway sessions.

											
										
										
											2026-02-21 01:20:18 -08:00
+								        # Session logs go into ~/.hermes/sessions/ alongside gateway sessions
 								        hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 								        self.logs_dir = hermes_home / "sessions"
 								        self.logs_dir.mkdir(parents=True, exist_ok=True)
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
 								        # Track conversation messages for session logging
 								        self._session_messages: List[Dict[str, Any]] = []
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # Cached system prompt -- built once per session, only rebuilt on compression
 								        self._cached_system_prompt: Optional[str] = None
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								        # Filesystem checkpoint manager (transparent — not a tool)
 								        from tools.checkpoint_manager import CheckpointManager
 								        self._checkpoint_mgr = CheckpointManager(
 								            enabled=checkpoints_enabled,
 								            max_snapshots=checkpoint_max_snapshots,
 								        )
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # SQLite session store (optional -- provided by CLI or gateway)
 								        self._session_db = session_db
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        if self._session_db:
 								            try:
 								                self._session_db.create_session(
 								                    session_id=self.session_id,
 								                    source=self.platform or "cli",
 								                    model=self.model,
 								                    model_config={
 								                        "max_iterations": self.max_iterations,
 								                        "reasoning_config": reasoning_config,
 								                        "max_tokens": max_tokens,
 								                    },
 								                    user_id=None,
 								                )
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Session DB create_session failed: %s", e)
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
-												Add todo tool for agent task planning and management

Single `todo` tool that reads (no params) or writes (provide todos array
with merge flag). In-memory TodoStore on AIAgent, no system prompt
mutation, behavioral guidance in tool description only. State re-injected
after context compression events. Gateway sessions hydrate from
conversation history. Added to all platform toolsets.

Also wired into RL agent_loop.py with per-run TodoStore and fixed
browser_snapshot user_task passthrough from first user message.

											
										
										
											2026-02-17 17:02:33 -08:00
+								        # In-memory todo list for task planning (one per agent/session)
 								        from tools.todo_tool import TodoStore
 								        self._todo_store = TodoStore()
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # Persistent memory (MEMORY.md + USER.md) -- loaded from disk
 								        self._memory_store = None
 								        self._memory_enabled = False
 								        self._user_profile_enabled = False
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        self._memory_nudge_interval = 10
 								        self._memory_flush_min_turns = 6
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        if not skip_memory:
 								            try:
 								                from hermes_cli.config import load_config as _load_mem_config
 								                mem_config = _load_mem_config().get("memory", {})
 								                self._memory_enabled = mem_config.get("memory_enabled", False)
 								                self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								                self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
 								                self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								                if self._memory_enabled or self._user_profile_enabled:
 								                    from tools.memory_tool import MemoryStore
 								                    self._memory_store = MemoryStore(
 								                        memory_char_limit=mem_config.get("memory_char_limit", 2200),
 								                        user_char_limit=mem_config.get("user_char_limit", 1375),
 								                    )
 								                    self._memory_store.load_from_disk()
 								            except Exception:
 								                pass  # Memory is optional -- don't break agent init
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								        # Honcho AI-native memory (cross-session user modeling)
 								        # Reads ~/.honcho/config.json as the single source of truth.
 								        self._honcho = None  # HonchoSessionManager | None
 								        self._honcho_session_key = honcho_session_key
 								        if not skip_memory:
 								            try:
 								                from honcho_integration.client import HonchoClientConfig, get_honcho_client
 								                hcfg = HonchoClientConfig.from_global_config()
 								                if hcfg.enabled and hcfg.api_key:
 								                    from honcho_integration.session import HonchoSessionManager
 								                    client = get_honcho_client(hcfg)
 								                    self._honcho = HonchoSessionManager(
 								                        honcho=client,
 								                        config=hcfg,
-												feat: integrate Honcho with USER.md memory system

When Honcho is active:
- System prompt uses Honcho prefetch instead of USER.md
- memory tool target=user add routes to Honcho
- MEMORY.md untouched in all cases

When disabled, everything works as before.

Also wires up contextTokens config to cap prefetch size.

											
										
										
											2026-02-26 18:02:24 -05:00
+								                        context_tokens=hcfg.context_tokens,
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								                    )
 								                    # Resolve session key: explicit arg > global sessions map > fallback
 								                    if not self._honcho_session_key:
 								                        self._honcho_session_key = (
 								                            hcfg.resolve_session_name()
 								                            or "hermes-default"
 								                        )
 								                    # Ensure session exists in Honcho
 								                    self._honcho.get_or_create(self._honcho_session_key)
 								                    # Inject session context into the honcho tool module
 								                    from tools.honcho_tools import set_session_context
 								                    set_session_context(self._honcho, self._honcho_session_key)
 								                    logger.info(
 								                        "Honcho active (session: %s, user: %s, workspace: %s)",
 								                        self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
 								                    )
 								                else:
 								                    if not hcfg.enabled:
 								                        logger.debug("Honcho disabled in global config")
 								                    elif not hcfg.api_key:
 								                        logger.debug("Honcho enabled but no API key configured")
 								            except Exception as e:
 								                logger.debug("Honcho init failed (non-fatal): %s", e)
 								                self._honcho = None
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        # Skills config: nudge interval for skill creation reminders
 								        self._skill_nudge_interval = 15
 								        try:
 								            from hermes_cli.config import load_config as _load_skills_config
 								            skills_config = _load_skills_config().get("skills", {})
 								            self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
 								        except Exception:
 								            pass
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								        # Initialize context compressor for automatic context management
 								        # Compresses conversation when approaching model's context limit
-												refactor: update context compression configuration to use config.yaml and improve model handling

											
										
										
											2026-02-28 04:46:35 -08:00
+								        # Configuration via config.yaml (compression section) or environment variables
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
 								        compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
-												refactor: update context compression configuration to use config.yaml and improve model handling

											
										
										
											2026-02-28 04:46:35 -08:00
+								        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
 								        self.context_compressor = ContextCompressor(
 								            model=self.model,
 								            threshold_percent=compression_threshold,
-												refactor: remove unused compression model variable in AIAgent

- Eliminated the `compression_model` variable from the AIAgent class, as it was not being utilized.
- Cleaned up the context compressor initialization for improved clarity and maintainability.

											
										
										
											2026-02-22 02:17:33 -08:00
+								            protect_first_n=3,
 								            protect_last_n=4,
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								            summary_target_tokens=500,
-												refactor: update context compression configuration to use config.yaml and improve model handling

											
										
										
											2026-02-28 04:46:35 -08:00
+								            summary_model_override=compression_summary_model,
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								            quiet_mode=self.quiet_mode,
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								            base_url=self.base_url,
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								        )
 								        self.compression_enabled = compression_enabled
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        self._user_turn_count = 0
-												feat(cli): add /usage command to display session token usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.

											
										
										
											2026-03-01 00:23:19 -08:00
 								        # Cumulative token usage for the session
 								        self.session_prompt_tokens = 0
 								        self.session_completion_tokens = 0
 								        self.session_total_tokens = 0
 								        self.session_api_calls = 0
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
 								        if not self.quiet_mode:
 								            if compression_enabled:
 								                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
 								            else:
 								                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others).
- Updated API calls in AIAgent to utilize the new max tokens handling.
- Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients.
- Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.

											
										
										
											2026-02-26 20:23:56 -08:00
+								    def _max_tokens_param(self, value: int) -> dict:
 								        """Return the correct max tokens kwarg for the current provider.
 								        OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
 								        'max_completion_tokens'. OpenRouter, local models, and older
 								        OpenAI models use 'max_tokens'.
 								        """
 								        _is_direct_openai = (
 								            "api.openai.com" in self.base_url.lower()
 								            and "openrouter" not in self.base_url.lower()
 								        )
 								        if _is_direct_openai:
 								            return {"max_completion_tokens": value}
 								        return {"max_tokens": value}
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								    def _has_content_after_think_block(self, content: str) -> bool:
 								        """
 								        Check if content has actual text after any <think></think> blocks.
 								        This detects cases where the model only outputs reasoning but no actual
 								        response, which indicates an incomplete generation that should be retried.
 								        Args:
 								            content: The assistant message content to check
 								        Returns:
 								            True if there's meaningful content after think blocks, False otherwise
 								        """
 								        if not content:
 								            return False
 								        # Remove all <think>...</think> blocks (including nested ones, non-greedy)
 								        cleaned = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
 								        # Check if there's any non-whitespace content remaining
 								        return bool(cleaned.strip())
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								    def _strip_think_blocks(self, content: str) -> str:
 								        """Remove <think>...</think> blocks from content, returning only visible text."""
 								        if not content:
 								            return ""
 								        return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
 								    def _looks_like_codex_intermediate_ack(
 								        self,
 								        user_message: str,
 								        assistant_content: str,
 								        messages: List[Dict[str, Any]],
 								    ) -> bool:
 								        """Detect a planning/ack message that should continue instead of ending the turn."""
 								        if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
 								            return False
 								        assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
 								        if not assistant_text:
 								            return False
 								        if len(assistant_text) > 1200:
 								            return False
 								        has_future_ack = bool(
 								            re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
 								        )
 								        if not has_future_ack:
 								            return False
 								        action_markers = (
 								            "look into",
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								            "look at",
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								            "inspect",
 								            "scan",
 								            "check",
 								            "analyz",
 								            "review",
 								            "explore",
 								            "read",
 								            "open",
 								            "run",
 								            "test",
 								            "fix",
 								            "debug",
 								            "search",
 								            "find",
 								            "walkthrough",
 								            "report back",
 								            "summarize",
 								        )
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								        workspace_markers = (
 								            "directory",
 								            "current directory",
 								            "current dir",
 								            "cwd",
 								            "repo",
 								            "repository",
 								            "codebase",
 								            "project",
 								            "folder",
 								            "filesystem",
 								            "file tree",
 								            "files",
 								            "path",
 								        )
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
 								        user_text = (user_message or "").strip().lower()
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								        user_targets_workspace = (
 								            any(marker in user_text for marker in workspace_markers)
 								            or "~/" in user_text
 								            or "/" in user_text
 								        )
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								        assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								        assistant_targets_workspace = any(
 								            marker in assistant_text for marker in workspace_markers
 								        )
 								        return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								    def _extract_reasoning(self, assistant_message) -> Optional[str]:
 								        """
 								        Extract reasoning/thinking content from an assistant message.
 								        OpenRouter and various providers can return reasoning in multiple formats:
 . message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
 . message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
 . message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
 								        Args:
 								            assistant_message: The assistant message object from the API response
 								        Returns:
 								            Combined reasoning text, or None if no reasoning found
 								        """
 								        reasoning_parts = []
 								        # Check direct reasoning field
 								        if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
 								            reasoning_parts.append(assistant_message.reasoning)
 								        # Check reasoning_content field (alternative name used by some providers)
 								        if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
 								            # Don't duplicate if same as reasoning
 								            if assistant_message.reasoning_content not in reasoning_parts:
 								                reasoning_parts.append(assistant_message.reasoning_content)
 								        # Check reasoning_details array (OpenRouter unified format)
 								        # Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
 								        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
 								            for detail in assistant_message.reasoning_details:
 								                if isinstance(detail, dict):
 								                    # Extract summary from reasoning detail object
 								                    summary = detail.get('summary') or detail.get('content') or detail.get('text')
 								                    if summary and summary not in reasoning_parts:
 								                        reasoning_parts.append(summary)
 								        # Combine all reasoning parts
 								        if reasoning_parts:
 								            return "\n\n".join(reasoning_parts)
 								        return None
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								    def _cleanup_task_resources(self, task_id: str) -> None:
 								        """Clean up VM and browser resources for a given task."""
 								        try:
 								            cleanup_vm(task_id)
 								        except Exception as e:
 								            if self.verbose_logging:
 								                logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
 								        try:
 								            cleanup_browser(task_id)
 								        except Exception as e:
 								            if self.verbose_logging:
 								                logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								    def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None):
 								        """Save session state to both JSON log and SQLite on any exit path.
 								        Ensures conversations are never lost, even on errors or early returns.
 								        """
 								        self._session_messages = messages
 								        self._save_session_log(messages)
 								        self._flush_messages_to_session_db(messages, conversation_history)
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								    def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								        """Persist any un-flushed messages to the SQLite session store.
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								        Uses _last_flushed_db_idx to track which messages have already been
 								        written, so repeated calls (from multiple exit paths) only write
 								        truly new messages — preventing the duplicate-write bug (#860).
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								        """
 								        if not self._session_db:
 								            return
 								        try:
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								            start_idx = len(conversation_history) if conversation_history else 0
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								            flush_from = max(start_idx, self._last_flushed_db_idx)
 								            for msg in messages[flush_from:]:
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                role = msg.get("role", "unknown")
 								                content = msg.get("content")
 								                tool_calls_data = None
 								                if hasattr(msg, "tool_calls") and msg.tool_calls:
 								                    tool_calls_data = [
 								                        {"name": tc.function.name, "arguments": tc.function.arguments}
 								                        for tc in msg.tool_calls
 								                    ]
 								                elif isinstance(msg.get("tool_calls"), list):
 								                    tool_calls_data = msg["tool_calls"]
 								                self._session_db.append_message(
 								                    session_id=self.session_id,
 								                    role=role,
 								                    content=content,
 								                    tool_name=msg.get("tool_name"),
 								                    tool_calls=tool_calls_data,
 								                    tool_call_id=msg.get("tool_call_id"),
 								                    finish_reason=msg.get("finish_reason"),
 								                )
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								            self._last_flushed_db_idx = len(messages)
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								        except Exception as e:
 								            logger.debug("Session DB append_message failed: %s", e)
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								    def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
 								        """
 								        Get messages up to (but not including) the last assistant turn.
 								        This is used when we need to "roll back" to the last successful point
 								        in the conversation, typically when the final assistant message is
 								        incomplete or malformed.
 								        Args:
 								            messages: Full message list
 								        Returns:
 								            Messages up to the last complete assistant turn (ending with user/tool message)
 								        """
 								        if not messages:
 								            return []
 								        # Find the index of the last assistant message
 								        last_assistant_idx = None
 								        for i in range(len(messages) - 1, -1, -1):
 								            if messages[i].get("role") == "assistant":
 								                last_assistant_idx = i
 								                break
 								        if last_assistant_idx is None:
 								            # No assistant message found, return all messages
 								            return messages.copy()
 								        # Return everything up to (not including) the last assistant message
 								        return messages[:last_assistant_idx]
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    def _format_tools_for_system_message(self) -> str:
 								        """
 								        Format tool definitions for the system message in the trajectory format.
 								        Returns:
 								            str: JSON string representation of tool definitions
 								        """
 								        if not self.tools:
 								            return "[]"
 								        # Convert tool definitions to the format expected in trajectories
 								        formatted_tools = []
 								        for tool in self.tools:
 								            func = tool["function"]
 								            formatted_tool = {
 								                "name": func["name"],
 								                "description": func.get("description", ""),
 								                "parameters": func.get("parameters", {}),
 								                "required": None  # Match the format in the example
 								            }
 								            formatted_tools.append(formatted_tool)
-												some cleanups

											
										
										
											2025-11-05 03:47:17 +00:00
+								        return json.dumps(formatted_tools, ensure_ascii=False)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
 								        """
 								        Convert internal message format to trajectory format for saving.
 								        Args:
 								            messages (List[Dict]): Internal message history
 								            user_query (str): Original user query
 								            completed (bool): Whether the conversation completed successfully
 								        Returns:
 								            List[Dict]: Messages in trajectory format
 								        """
 								        trajectory = []
 								        # Add system message with tool definitions
 								        system_msg = (
 								            "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
 								            "You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
 								            "with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
 								            "into functions. After calling & executing the functions, you will be provided with function results within "
 								            "<tool_response> </tool_response> XML tags. Here are the available tools:\n"
 								            f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
 								            "For each function call return a JSON object, with the following pydantic model json schema for each:\n"
 								            "{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
 								            "'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
 								            "Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
 								            "Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
 								        )
 								        trajectory.append({
 								            "from": "system",
 								            "value": system_msg
 								        })
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								        # Add the actual user prompt (from the dataset) as the first human message
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        trajectory.append({
 								            "from": "human",
 								            "value": user_query
 								        })
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Skip the first message (the user query) since we already added it above.
 								        # Prefill messages are injected at API-call time only (not in the messages
 								        # list), so no offset adjustment is needed here.
 								        i = 1
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        while i < len(messages):
 								            msg = messages[i]
 								            if msg["role"] == "assistant":
 								                # Check if this message has tool calls
 								                if "tool_calls" in msg and msg["tool_calls"]:
 								                    # Format assistant message with tool calls
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                    # Add <think> tags around reasoning for trajectory storage
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    content = ""
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                    # Prepend reasoning in <think> tags if available (native thinking tokens)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    if msg.get("reasoning") and msg["reasoning"].strip():
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    if msg.get("content") and msg["content"].strip():
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                        # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
 								                        # (used when native thinking is disabled and model reasons via XML)
-												refactor: streamline scratchpad handling in AIAgent

- Removed static methods for converting and checking <REASONING_SCRATCHPAD> tags, simplifying the codebase.
- Replaced calls to the removed methods with direct function calls for better clarity and maintainability.
- Updated trajectory saving logic to utilize a dedicated function for improved organization and readability.

											
										
										
											2026-02-23 09:55:09 -08:00
+								                        content += convert_scratchpad_to_think(msg["content"]) + "\n"
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								                    # Add tool calls wrapped in XML tags
 								                    for tool_call in msg["tool_calls"]:
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        # Parse arguments - should always succeed since we validate during conversation
 								                        # but keep try-except as safety net
 								                        try:
 								                            arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
 								                        except json.JSONDecodeError:
 								                            # This shouldn't happen since we validate and retry during conversation,
 								                            # but if it does, log warning and use empty dict
 								                            logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
 								                            arguments = {}
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                        tool_call_json = {
 								                            "name": tool_call["function"]["name"],
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                            "arguments": arguments
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                        }
-												some cleanups

											
										
										
											2025-11-05 03:47:17 +00:00
+								                        content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
+								                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
 								                    # so the format is consistent for training data
 								                    if "<think>" not in content:
 								                        content = "<think>\n</think>\n" + content
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    trajectory.append({
 								                        "from": "gpt",
 								                        "value": content.rstrip()
 								                    })
 								                    # Collect all subsequent tool responses
 								                    tool_responses = []
 								                    j = i + 1
 								                    while j < len(messages) and messages[j]["role"] == "tool":
 								                        tool_msg = messages[j]
 								                        # Format tool response with XML tags
 								                        tool_response = f"<tool_response>\n"
 								                        # Try to parse tool content as JSON if it looks like JSON
 								                        tool_content = tool_msg["content"]
 								                        try:
 								                            if tool_content.strip().startswith(("{", "[")):
 								                                tool_content = json.loads(tool_content)
 								                        except (json.JSONDecodeError, AttributeError):
 								                            pass  # Keep as string if not valid JSON
 								                        tool_response += json.dumps({
 								                            "tool_call_id": tool_msg.get("tool_call_id", ""),
 								                            "name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
 								                            "content": tool_content
-												some cleanups

											
										
										
											2025-11-05 03:47:17 +00:00
+								                        }, ensure_ascii=False)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                        tool_response += "\n</tool_response>"
 								                        tool_responses.append(tool_response)
 								                        j += 1
 								                    # Add all tool responses as a single message
 								                    if tool_responses:
 								                        trajectory.append({
 								                            "from": "tool",
 								                            "value": "\n".join(tool_responses)
 								                        })
 								                        i = j - 1  # Skip the tool messages we just processed
 								                else:
 								                    # Regular assistant message without tool calls
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                    # Add <think> tags around reasoning for trajectory storage
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    content = ""
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                    # Prepend reasoning in <think> tags if available (native thinking tokens)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    if msg.get("reasoning") and msg["reasoning"].strip():
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        content = f"<think>\n{msg['reasoning']}\n</think>\n"
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                    # Convert any <REASONING_SCRATCHPAD> tags to <think> tags
 								                    # (used when native thinking is disabled and model reasons via XML)
 								                    raw_content = msg["content"] or ""
-												refactor: streamline scratchpad handling in AIAgent

- Removed static methods for converting and checking <REASONING_SCRATCHPAD> tags, simplifying the codebase.
- Replaced calls to the removed methods with direct function calls for better clarity and maintainability.
- Updated trajectory saving logic to utilize a dedicated function for improved organization and readability.

											
										
										
											2026-02-23 09:55:09 -08:00
+								                    content += convert_scratchpad_to_think(raw_content)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
+								                    # Ensure every gpt turn has a <think> block (empty if no reasoning)
 								                    if "<think>" not in content:
 								                        content = "<think>\n</think>\n" + content
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    trajectory.append({
 								                        "from": "gpt",
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        "value": content.strip()
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    })
 								            elif msg["role"] == "user":
 								                trajectory.append({
 								                    "from": "human",
 								                    "value": msg["content"]
 								                })
 								            i += 1
 								        return trajectory
 								    def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
 								        """
 								        Save conversation trajectory to JSONL file.
 								        Args:
 								            messages (List[Dict]): Complete message history
 								            user_query (str): Original user query
 								            completed (bool): Whether the conversation completed successfully
 								        """
 								        if not self.save_trajectories:
 								            return
 								        trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
-												refactor: streamline scratchpad handling in AIAgent

- Removed static methods for converting and checking <REASONING_SCRATCHPAD> tags, simplifying the codebase.
- Replaced calls to the removed methods with direct function calls for better clarity and maintainability.
- Updated trajectory saving logic to utilize a dedicated function for improved organization and readability.

											
										
										
											2026-02-23 09:55:09 -08:00
+								        _save_trajectory_to_file(trajectory, self.model, completed)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
+								    def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
 								        if not key:
 								            return None
 								        if len(key) <= 12:
 								            return "***"
 								        return f"{key[:8]}...{key[-4:]}"
 								    def _dump_api_request_debug(
 								        self,
 								        api_kwargs: Dict[str, Any],
 								        *,
 								        reason: str,
 								        error: Optional[Exception] = None,
 								    ) -> Optional[Path]:
 								        """
 								        Dump a debug-friendly HTTP request record for chat.completions.create().
 								        Captures the request body from api_kwargs (excluding transport-only keys
 								        like timeout). Intended for debugging provider-side 4xx failures where
 								        retries are not useful.
 								        """
 								        try:
 								            body = copy.deepcopy(api_kwargs)
 								            body.pop("timeout", None)
 								            body = {k: v for k, v in body.items() if v is not None}
 								            api_key = None
 								            try:
 								                api_key = getattr(self.client, "api_key", None)
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Could not extract API key for debug dump: %s", e)
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
 								            dump_payload: Dict[str, Any] = {
 								                "timestamp": datetime.now().isoformat(),
 								                "session_id": self.session_id,
 								                "reason": reason,
 								                "request": {
 								                    "method": "POST",
 								                    "url": f"{self.base_url.rstrip('/')}/chat/completions",
 								                    "headers": {
 								                        "Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}",
 								                        "Content-Type": "application/json",
 								                    },
 								                    "body": body,
 								                },
 								            }
 								            if error is not None:
 								                error_info: Dict[str, Any] = {
 								                    "type": type(error).__name__,
 								                    "message": str(error),
 								                }
 								                for attr_name in ("status_code", "request_id", "code", "param", "type"):
 								                    attr_value = getattr(error, attr_name, None)
 								                    if attr_value is not None:
 								                        error_info[attr_name] = attr_value
 								                body_attr = getattr(error, "body", None)
 								                if body_attr is not None:
 								                    error_info["body"] = body_attr
 								                response_obj = getattr(error, "response", None)
 								                if response_obj is not None:
 								                    try:
 								                        error_info["response_status"] = getattr(response_obj, "status_code", None)
 								                        error_info["response_text"] = response_obj.text
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								                    except Exception as e:
 								                        logger.debug("Could not extract error response details: %s", e)
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
 								                dump_payload["error"] = error_info
 								            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
 								            dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
 								            dump_file.write_text(
 								                json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
 								                encoding="utf-8",
 								            )
 								            print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
 								            if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
 								                print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
 								            return dump_file
 								        except Exception as dump_error:
 								            if self.verbose_logging:
 								                logging.warning(f"Failed to dump API request debug payload: {dump_error}")
 								            return None
-												refactor: enhance session content handling in AIAgent and update TTS output path

- Introduced a new static method `_clean_session_content` in the `AIAgent` class to convert REASONING_SCRATCHPAD tags to <think> blocks and clean up whitespace in session logs.
- Updated the `_save_session_log` method to utilize the cleaned content for assistant messages, ensuring consistency in session logs.
- Changed the default output directory for TTS audio files from `~/voice-memos` to `~/.hermes/audio_cache`, reflecting a more appropriate storage location.

											
										
										
											2026-02-25 04:22:03 -08:00
+								    @staticmethod
 								    def _clean_session_content(content: str) -> str:
 								        """Convert REASONING_SCRATCHPAD to think tags and clean up whitespace."""
 								        if not content:
 								            return content
 								        content = convert_scratchpad_to_think(content)
 								        content = re.sub(r'\n+(<think>)', r'\n\1', content)
 								        content = re.sub(r'(</think>)\n+', r'\1\n', content)
 								        return content.strip()
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								    def _save_session_log(self, messages: List[Dict[str, Any]] = None):
 								        """
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
+								        Save the full raw session to a JSON file.
 								        Stores every message exactly as the agent sees it: user messages,
 								        assistant messages (with reasoning, finish_reason, tool_calls),
 								        tool responses (with tool_call_id, tool_name), and injected system
 								        messages (compression summaries, todo snapshots, etc.).
-												refactor: enhance session content handling in AIAgent and update TTS output path

- Introduced a new static method `_clean_session_content` in the `AIAgent` class to convert REASONING_SCRATCHPAD tags to <think> blocks and clean up whitespace in session logs.
- Updated the `_save_session_log` method to utilize the cleaned content for assistant messages, ensuring consistency in session logs.
- Changed the default output directory for TTS audio files from `~/voice-memos` to `~/.hermes/audio_cache`, reflecting a more appropriate storage location.

											
										
										
											2026-02-25 04:22:03 -08:00
+								        REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
+								        Overwritten after each turn so it always reflects the latest state.
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        """
 								        messages = messages or self._session_messages
 								        if not messages:
 								            return
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        try:
-												refactor: enhance session content handling in AIAgent and update TTS output path

- Introduced a new static method `_clean_session_content` in the `AIAgent` class to convert REASONING_SCRATCHPAD tags to <think> blocks and clean up whitespace in session logs.
- Updated the `_save_session_log` method to utilize the cleaned content for assistant messages, ensuring consistency in session logs.
- Changed the default output directory for TTS audio files from `~/voice-memos` to `~/.hermes/audio_cache`, reflecting a more appropriate storage location.

											
										
										
											2026-02-25 04:22:03 -08:00
+								            # Clean assistant content for session logs
 								            cleaned = []
 								            for msg in messages:
 								                if msg.get("role") == "assistant" and msg.get("content"):
 								                    msg = dict(msg)
 								                    msg["content"] = self._clean_session_content(msg["content"])
 								                cleaned.append(msg)
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								            entry = {
 								                "session_id": self.session_id,
 								                "model": self.model,
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
+								                "base_url": self.base_url,
 								                "platform": self.platform,
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								                "session_start": self.session_start.isoformat(),
 								                "last_updated": datetime.now().isoformat(),
-												feat(agent): include system prompt in agent status output

Added the system prompt to the AIAgent class's status output, ensuring that the current system prompt is included in the agent's status information. This enhancement improves visibility into the agent's configuration during runtime.

											
										
										
											2026-03-01 23:50:54 -08:00
+								                "system_prompt": self._cached_system_prompt or "",
-												feat(agent): include tools in agent status output

Added the tools attribute to the AIAgent class's status output, ensuring that the current tools used by the agent are included in the status information. This enhancement improves the visibility of the agent's capabilities during runtime.

											
										
										
											2026-03-02 00:13:41 -08:00
+								                "tools": self.tools or [],
-												refactor: enhance session content handling in AIAgent and update TTS output path

- Introduced a new static method `_clean_session_content` in the `AIAgent` class to convert REASONING_SCRATCHPAD tags to <think> blocks and clean up whitespace in session logs.
- Updated the `_save_session_log` method to utilize the cleaned content for assistant messages, ensuring consistency in session logs.
- Changed the default output directory for TTS audio files from `~/voice-memos` to `~/.hermes/audio_cache`, reflecting a more appropriate storage location.

											
										
										
											2026-02-25 04:22:03 -08:00
+								                "message_count": len(cleaned),
 								                "messages": cleaned,
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								            }
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								            with open(self.session_log_file, "w", encoding="utf-8") as f:
-												refactor: remove temporary API payload logging and enhance session log structure

- Eliminated the `_log_api_payload` method used for temporary debugging, streamlining the codebase.
- Updated the `_save_session_log` method to save the full raw session, including all messages and metadata, improving the clarity and completeness of session logs.
- Adjusted session log entry to include additional context such as `base_url` and `platform` for better tracking.

											
										
										
											2026-02-21 01:26:37 -08:00
+								                json.dump(entry, f, indent=2, ensure_ascii=False, default=str)
-												Enhance session logging and interactive sudo support

- Implemented automatic session logging, saving conversation trajectories to the `logs/` directory in JSON format, with each session having a unique identifier.
- Updated the CLI to display the session ID in the welcome banner for easy reference.
- Introduced an interactive sudo password prompt in CLI mode, allowing users to enter their password with a 45-second timeout, enhancing user experience during command execution.
- Documented session logging and interactive sudo features in `README.md`, `cli.md`, and `cli-config.yaml.example` for better user guidance.

											
										
										
											2026-02-01 15:36:26 -08:00
+								        except Exception as e:
 								            if self.verbose_logging:
 								                logging.warning(f"Failed to save session log: {e}")
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								    def interrupt(self, message: str = None) -> None:
 								        """
 								        Request the agent to interrupt its current tool-calling loop.
 								        Call this from another thread (e.g., input handler, message receiver)
 								        to gracefully stop the agent and process a new message.
-												Implement interrupt handling for long-running tool executions in AIAgent

- Added functionality to signal and terminate long-running terminal commands when a new user message is received, allowing for immediate agent response.
- Introduced a global interrupt event in the terminal tool to facilitate early termination of subprocesses.
- Updated the AIAgent class to handle interrupts gracefully, ensuring that remaining tool calls are skipped and appropriate messages are returned to maintain valid message sequences.

											
										
										
											2026-02-10 16:34:27 -08:00
+								        Also signals long-running tool executions (e.g. terminal commands)
 								        to terminate early, so the agent can respond immediately.
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        Args:
 								            message: Optional new message that triggered the interrupt.
 								                     If provided, the agent will include this in its response context.
 								        Example (CLI):
 								            # In a separate input thread:
 								            if user_typed_something:
 								                agent.interrupt(user_input)
 								        Example (Messaging):
 								            # When new message arrives for active session:
 								            if session_has_running_agent:
 								                running_agent.interrupt(new_message.text)
 								        """
 								        self._interrupt_requested = True
 								        self._interrupt_message = message
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        # Signal all tools to abort any in-flight operations immediately
 								        _set_interrupt(True)
-												feat: implement subagent delegation for task management

- Introduced the `delegate_task` tool, allowing the main agent to spawn child AIAgent instances with isolated context for complex tasks.
- Supported both single-task and batch processing (up to 3 concurrent tasks) to enhance task management capabilities.
- Updated configuration options for delegation, including maximum iterations and default toolsets for subagents.
- Enhanced documentation to provide clear guidance on using the delegation feature and its configuration.
- Added comprehensive tests to ensure the functionality and reliability of the delegation logic.

											
										
										
											2026-02-20 03:15:53 -08:00
+								        # Propagate interrupt to any running child agents (subagent delegation)
 								        for child in self._active_children:
 								            try:
 								                child.interrupt(message)
-												refactor: enhance error handling with structured logging across multiple modules

- Updated various modules including cli.py, run_agent.py, gateway, and tools to replace silent exception handling with structured logging.
- Improved error messages to provide more context, aiding in debugging and monitoring.
- Ensured consistent logging practices throughout the codebase, enhancing traceability and maintainability.

											
										
										
											2026-02-21 03:32:11 -08:00
+								            except Exception as e:
 								                logger.debug("Failed to propagate interrupt to child agent: %s", e)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        if not self.quiet_mode:
 								            print(f"\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
 								    def clear_interrupt(self) -> None:
-												refactor: enhance interrupt handling in AIAgent class

- Updated the `clear_interrupt` method to also reset the global tool interrupt signal, improving the clarity of interrupt management within the agent.
- This change ensures that all interrupt states are properly cleared, enhancing the reliability of the agent's operation.

											
										
										
											2026-02-25 03:45:47 -08:00
+								        """Clear any pending interrupt request and the global tool interrupt signal."""
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        self._interrupt_requested = False
 								        self._interrupt_message = None
-												refactor: enhance interrupt handling in AIAgent class

- Updated the `clear_interrupt` method to also reset the global tool interrupt signal, improving the clarity of interrupt management within the agent.
- This change ensures that all interrupt states are properly cleared, enhancing the reliability of the agent's operation.

											
										
										
											2026-02-25 03:45:47 -08:00
+								        _set_interrupt(False)
-												Add todo tool for agent task planning and management

Single `todo` tool that reads (no params) or writes (provide todos array
with merge flag). In-memory TodoStore on AIAgent, no system prompt
mutation, behavioral guidance in tool description only. State re-injected
after context compression events. Gateway sessions hydrate from
conversation history. Added to all platform toolsets.

Also wired into RL agent_loop.py with per-run TodoStore and fixed
browser_snapshot user_task passthrough from first user message.

											
										
										
											2026-02-17 17:02:33 -08:00
 								    def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
 								        """
 								        Recover todo state from conversation history.
 								        The gateway creates a fresh AIAgent per message, so the in-memory
 								        TodoStore is empty. We scan the history for the most recent todo
 								        tool response and replay it to reconstruct the state.
 								        """
 								        # Walk history backwards to find the most recent todo tool response
 								        last_todo_response = None
 								        for msg in reversed(history):
 								            if msg.get("role") != "tool":
 								                continue
 								            content = msg.get("content", "")
 								            # Quick check: todo responses contain "todos" key
 								            if '"todos"' not in content:
 								                continue
 								            try:
 								                data = json.loads(content)
 								                if "todos" in data and isinstance(data["todos"], list):
 								                    last_todo_response = data["todos"]
 								                    break
 								            except (json.JSONDecodeError, TypeError):
 								                continue
 								        if last_todo_response:
 								            # Replay the items into the store (replace mode)
 								            self._todo_store.write(last_todo_response, merge=False)
 								            if not self.quiet_mode:
 								                print(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        _set_interrupt(False)
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								    @property
 								    def is_interrupted(self) -> bool:
 								        """Check if an interrupt has been requested."""
 								        return self._interrupt_requested
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
 								    # ── Honcho integration helpers ──
 								    def _honcho_prefetch(self, user_message: str) -> str:
 								        """Fetch user context from Honcho for system prompt injection.
 								        Returns a formatted context block, or empty string if unavailable.
 								        """
 								        if not self._honcho or not self._honcho_session_key:
 								            return ""
 								        try:
 								            ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
 								            if not ctx:
 								                return ""
 								            parts = []
 								            rep = ctx.get("representation", "")
 								            card = ctx.get("card", "")
 								            if rep:
 								                parts.append(rep)
 								            if card:
 								                parts.append(card)
 								            if not parts:
 								                return ""
 								            return "# Honcho User Context\n" + "\n\n".join(parts)
 								        except Exception as e:
 								            logger.debug("Honcho prefetch failed (non-fatal): %s", e)
 								            return ""
-												feat: integrate Honcho with USER.md memory system

When Honcho is active:
- System prompt uses Honcho prefetch instead of USER.md
- memory tool target=user add routes to Honcho
- MEMORY.md untouched in all cases

When disabled, everything works as before.

Also wires up contextTokens config to cap prefetch size.

											
										
										
											2026-02-26 18:02:24 -05:00
+								    def _honcho_save_user_observation(self, content: str) -> str:
 								        """Route a memory tool target=user add to Honcho.
 								        Sends the content as a user peer message so Honcho's reasoning
 								        model can incorporate it into the user representation.
 								        """
 								        if not content or not content.strip():
 								            return json.dumps({"success": False, "error": "Content cannot be empty."})
 								        try:
 								            session = self._honcho.get_or_create(self._honcho_session_key)
 								            session.add_message("user", f"[observation] {content.strip()}")
 								            self._honcho.save(session)
 								            return json.dumps({
 								                "success": True,
 								                "target": "user",
 								                "message": "Saved to Honcho user model.",
 								            })
 								        except Exception as e:
 								            logger.debug("Honcho user observation failed: %s", e)
 								            return json.dumps({"success": False, "error": f"Honcho save failed: {e}"})
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								    def _honcho_sync(self, user_content: str, assistant_content: str) -> None:
 								        """Sync the user/assistant message pair to Honcho."""
 								        if not self._honcho or not self._honcho_session_key:
 								            return
 								        try:
 								            session = self._honcho.get_or_create(self._honcho_session_key)
 								            session.add_message("user", user_content)
 								            session.add_message("assistant", assistant_content)
 								            self._honcho.save(session)
 								        except Exception as e:
 								            logger.debug("Honcho sync failed (non-fatal): %s", e)
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								    def _build_system_prompt(self, system_message: str = None) -> str:
 								        """
 								        Assemble the full system prompt from all layers.
 								        Called once per session (cached on self._cached_system_prompt) and only
 								        rebuilt after context compression events. This ensures the system prompt
 								        is stable across all turns in a session, maximizing prefix cache hits.
 								        """
 								        # Layers (in order):
 								        #   1. Default agent identity (always present)
 								        #   2. User / gateway system prompt (if provided)
 								        #   3. Persistent memory (frozen snapshot)
 								        #   4. Skills guidance (if skills tools are loaded)
 								        #   5. Context files (SOUL.md, AGENTS.md, .cursorrules)
 								        #   6. Current date & time (frozen at build time)
 								        #   7. Platform-specific formatting hint
 								        prompt_parts = [DEFAULT_AGENT_IDENTITY]
-												Enhance agent guidance with memory and session search tools

- Introduced MEMORY_GUIDANCE and SESSION_SEARCH_GUIDANCE to improve agent's contextual awareness and proactive assistance.
- Updated AIAgent to conditionally include tool-aware guidance in prompts based on available tools.
- Enhanced descriptions in memory and session search schemas for clearer user instructions on when to utilize these features.

											
										
										
											2026-02-22 02:31:52 -08:00
+								        # Tool-aware behavioral guidance: only inject when the tools are loaded
 								        tool_guidance = []
 								        if "memory" in self.valid_tool_names:
 								            tool_guidance.append(MEMORY_GUIDANCE)
 								        if "session_search" in self.valid_tool_names:
 								            tool_guidance.append(SESSION_SEARCH_GUIDANCE)
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								        if "skill_manage" in self.valid_tool_names:
 								            tool_guidance.append(SKILLS_GUIDANCE)
-												Enhance agent guidance with memory and session search tools

- Introduced MEMORY_GUIDANCE and SESSION_SEARCH_GUIDANCE to improve agent's contextual awareness and proactive assistance.
- Updated AIAgent to conditionally include tool-aware guidance in prompts based on available tools.
- Enhanced descriptions in memory and session search schemas for clearer user instructions on when to utilize these features.

											
										
										
											2026-02-22 02:31:52 -08:00
+								        if tool_guidance:
 								            prompt_parts.append(" ".join(tool_guidance))
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Note: ephemeral_system_prompt is NOT included here. It's injected at
 								        # API-call time only so it stays out of the cached/stored system prompt.
 								        if system_message is not None:
 								            prompt_parts.append(system_message)
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
 								        if self._memory_store:
 								            if self._memory_enabled:
 								                mem_block = self._memory_store.format_for_system_prompt("memory")
 								                if mem_block:
 								                    prompt_parts.append(mem_block)
-												refactor: run Honcho and USER.md in tandem

USER.md stays in system prompt when Honcho is active -- prefetch is
additive context, not a replacement. Memory tool user observations
write to both USER.md (local) and Honcho (cross-session) simultaneously.

											
										
										
											2026-02-26 18:07:33 -05:00
+								            # USER.md is always included when enabled -- Honcho prefetch is additive.
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								            if self._user_profile_enabled:
 								                user_block = self._memory_store.format_for_system_prompt("user")
 								                if user_block:
 								                    prompt_parts.append(user_block)
-												feat: introduce skill management tool for agent-created skills and skills migration to ~/.hermes

- Added a new `skill_manager_tool` to enable agents to create, update, and delete their own skills, enhancing procedural memory capabilities.
- Updated the skills directory structure to support user-created skills in `~/.hermes/skills/`, allowing for better organization and management.
- Enhanced the CLI and documentation to reflect the new skill management functionalities, including detailed instructions on creating and modifying skills.
- Implemented a manifest-based syncing mechanism for bundled skills to ensure user modifications are preserved during updates.

											
										
										
											2026-02-19 18:25:53 -08:00
+								        has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
 								        if skills_prompt:
 								            prompt_parts.append(skills_prompt)
 								        if not self.skip_context_files:
 								            context_files_prompt = build_context_files_prompt()
 								            if context_files_prompt:
 								                prompt_parts.append(context_files_prompt)
-												fix(timezone): add timezone-aware clock across agent, cron, and execute_code

											
										
										
											2026-03-03 11:57:18 +05:30
+								        from hermes_time import now as _hermes_now
 								        now = _hermes_now()
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        prompt_parts.append(
 								            f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
 								        )
 								        platform_key = (self.platform or "").lower().strip()
 								        if platform_key in PLATFORM_HINTS:
 								            prompt_parts.append(PLATFORM_HINTS[platform_key])
 								        return "\n\n".join(prompt_parts)
-												fix: tool call repair — auto-lowercase, fuzzy match, helpful error on unknown tool (#520)

- Add _repair_tool_call(): tries lowercase, normalize, then fuzzy match (difflib 0.7)
- Replace 3-retry-then-abort with graceful error: model receives helpful message and self-corrects
- Conversation stays alive instead of dying on hallucinated tool names

Closes #520

											
										
										
											2026-03-08 18:39:43 +03:00
+								    def _repair_tool_call(self, tool_name: str) -> str | None:
 								        """Attempt to repair a mismatched tool name before aborting.
 . Try lowercase
 . Try normalized (lowercase + hyphens/spaces -> underscores)
 . Try fuzzy match (difflib, cutoff=0.7)
 								        Returns the repaired name if found in valid_tool_names, else None.
 								        """
 								        from difflib import get_close_matches
 								        # 1. Lowercase
 								        lowered = tool_name.lower()
 								        if lowered in self.valid_tool_names:
 								            return lowered
 								        # 2. Normalize
 								        normalized = lowered.replace("-", "_").replace(" ", "_")
 								        if normalized in self.valid_tool_names:
 								            return normalized
 								        # 3. Fuzzy match
 								        matches = get_close_matches(lowered, self.valid_tool_names, n=1, cutoff=0.7)
 								        if matches:
 								            return matches[0]
 								        return None
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								    def _invalidate_system_prompt(self):
 								        """
 								        Invalidate the cached system prompt, forcing a rebuild on the next turn.
 								        Called after context compression events. Also reloads memory from disk
 								        so the rebuilt prompt captures any writes from this session.
 								        """
 								        self._cached_system_prompt = None
 								        if self._memory_store:
 								            self._memory_store.load_from_disk()
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								    def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
 								        """Convert chat-completions tool schemas to Responses function-tool schemas."""
 								        source_tools = tools if tools is not None else self.tools
 								        if not source_tools:
 								            return None
 								        converted: List[Dict[str, Any]] = []
 								        for item in source_tools:
 								            fn = item.get("function", {}) if isinstance(item, dict) else {}
 								            name = fn.get("name")
 								            if not isinstance(name, str) or not name.strip():
 								                continue
 								            converted.append({
 								                "type": "function",
 								                "name": name,
 								                "description": fn.get("description", ""),
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                "strict": False,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
 								            })
 								        return converted or None
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								    @staticmethod
 								    def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
 								        """Split a stored tool id into (call_id, response_item_id)."""
 								        if not isinstance(raw_id, str):
 								            return None, None
 								        value = raw_id.strip()
 								        if not value:
 								            return None, None
 								        if "|" in value:
 								            call_id, response_item_id = value.split("|", 1)
 								            call_id = call_id.strip() or None
 								            response_item_id = response_item_id.strip() or None
 								            return call_id, response_item_id
 								        if value.startswith("fc_"):
 								            return None, value
 								        return value, None
 								    def _derive_responses_function_call_id(
 								        self,
 								        call_id: str,
 								        response_item_id: Optional[str] = None,
 								    ) -> str:
 								        """Build a valid Responses `function_call.id` (must start with `fc_`)."""
 								        if isinstance(response_item_id, str):
 								            candidate = response_item_id.strip()
 								            if candidate.startswith("fc_"):
 								                return candidate
 								        source = (call_id or "").strip()
 								        if source.startswith("fc_"):
 								            return source
 								        if source.startswith("call_") and len(source) > len("call_"):
 								            return f"fc_{source[len('call_'):]}"
 								        sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
 								        if sanitized.startswith("fc_"):
 								            return sanitized
 								        if sanitized.startswith("call_") and len(sanitized) > len("call_"):
 								            return f"fc_{sanitized[len('call_'):]}"
 								        if sanitized:
 								            return f"fc_{sanitized[:48]}"
 								        seed = source or str(response_item_id or "") or uuid.uuid4().hex
 								        digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
 								        return f"fc_{digest}"
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								    def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 								        """Convert internal chat-style messages to Responses input items."""
 								        items: List[Dict[str, Any]] = []
 								        for msg in messages:
 								            if not isinstance(msg, dict):
 								                continue
 								            role = msg.get("role")
 								            if role == "system":
 								                continue
 								            if role in {"user", "assistant"}:
 								                content = msg.get("content", "")
 								                content_text = str(content) if content is not None else ""
 								                if role == "assistant":
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    # Replay encrypted reasoning items from previous turns
 								                    # so the API can maintain coherent reasoning chains.
 								                    codex_reasoning = msg.get("codex_reasoning_items")
 								                    if isinstance(codex_reasoning, list):
 								                        for ri in codex_reasoning:
 								                            if isinstance(ri, dict) and ri.get("encrypted_content"):
 								                                items.append(ri)
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    if content_text.strip():
 								                        items.append({"role": "assistant", "content": content_text})
 								                    tool_calls = msg.get("tool_calls")
 								                    if isinstance(tool_calls, list):
 								                        for tc in tool_calls:
 								                            if not isinstance(tc, dict):
 								                                continue
 								                            fn = tc.get("function", {})
 								                            fn_name = fn.get("name")
 								                            if not isinstance(fn_name, str) or not fn_name.strip():
 								                                continue
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                            embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
 								                                tc.get("id")
 								                            )
 								                            call_id = tc.get("call_id")
 								                            if not isinstance(call_id, str) or not call_id.strip():
 								                                call_id = embedded_call_id
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                            if not isinstance(call_id, str) or not call_id.strip():
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                                if (
 								                                    isinstance(embedded_response_item_id, str)
 								                                    and embedded_response_item_id.startswith("fc_")
 								                                    and len(embedded_response_item_id) > len("fc_")
 								                                ):
 								                                    call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
 								                                else:
 								                                    call_id = f"call_{uuid.uuid4().hex[:12]}"
 								                            call_id = call_id.strip()
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                            arguments = fn.get("arguments", "{}")
 								                            if isinstance(arguments, dict):
 								                                arguments = json.dumps(arguments, ensure_ascii=False)
 								                            elif not isinstance(arguments, str):
 								                                arguments = str(arguments)
 								                            arguments = arguments.strip() or "{}"
 								                            items.append({
 								                                "type": "function_call",
 								                                "call_id": call_id,
 								                                "name": fn_name,
 								                                "arguments": arguments,
 								                            })
 								                    continue
 								                items.append({"role": role, "content": content_text})
 								                continue
 								            if role == "tool":
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                raw_tool_call_id = msg.get("tool_call_id")
 								                call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
 								                        call_id = raw_tool_call_id.strip()
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                if not isinstance(call_id, str) or not call_id.strip():
 								                    continue
 								                items.append({
 								                    "type": "function_call_output",
 								                    "call_id": call_id,
 								                    "output": str(msg.get("content", "") or ""),
 								                })
 								        return items
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								    def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
 								        if not isinstance(raw_items, list):
 								            raise ValueError("Codex Responses input must be a list of input items.")
 								        normalized: List[Dict[str, Any]] = []
 								        for idx, item in enumerate(raw_items):
 								            if not isinstance(item, dict):
 								                raise ValueError(f"Codex Responses input[{idx}] must be an object.")
 								            item_type = item.get("type")
 								            if item_type == "function_call":
 								                call_id = item.get("call_id")
 								                name = item.get("name")
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
 								                if not isinstance(name, str) or not name.strip():
 								                    raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
 								                arguments = item.get("arguments", "{}")
 								                if isinstance(arguments, dict):
 								                    arguments = json.dumps(arguments, ensure_ascii=False)
 								                elif not isinstance(arguments, str):
 								                    arguments = str(arguments)
 								                arguments = arguments.strip() or "{}"
 								                normalized.append(
 								                    {
 								                        "type": "function_call",
 								                        "call_id": call_id.strip(),
 								                        "name": name.strip(),
 								                        "arguments": arguments,
 								                    }
 								                )
 								                continue
 								            if item_type == "function_call_output":
 								                call_id = item.get("call_id")
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
 								                output = item.get("output", "")
 								                if output is None:
 								                    output = ""
 								                if not isinstance(output, str):
 								                    output = str(output)
 								                normalized.append(
 								                    {
 								                        "type": "function_call_output",
 								                        "call_id": call_id.strip(),
 								                        "output": output,
 								                    }
 								                )
 								                continue
-												feat(agent): enhance reasoning handling and configuration

Added support for processing encrypted reasoning content within the AIAgent class. Introduced logic to determine reasoning effort and enable/disable reasoning based on configuration settings. Updated the kwargs to reflect these changes, ensuring proper handling of reasoning parameters during agent execution.

											
										
										
											2026-03-01 16:15:20 -08:00
+								            if item_type == "reasoning":
 								                encrypted = item.get("encrypted_content")
 								                if isinstance(encrypted, str) and encrypted:
 								                    reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
 								                    item_id = item.get("id")
 								                    if isinstance(item_id, str) and item_id:
 								                        reasoning_item["id"] = item_id
-												feat(agent): add summary handling for reasoning items

Enhanced the AIAgent class to capture and normalize summary information for reasoning items. Implemented logic to handle summaries as lists, ensuring proper formatting for API interactions. Updated tests to validate the inclusion of summaries in reasoning items, both for existing and default cases.

											
										
										
											2026-03-01 20:03:03 -08:00
+								                    summary = item.get("summary")
 								                    if isinstance(summary, list):
 								                        reasoning_item["summary"] = summary
 								                    else:
 								                        reasoning_item["summary"] = []
-												feat(agent): enhance reasoning handling and configuration

Added support for processing encrypted reasoning content within the AIAgent class. Introduced logic to determine reasoning effort and enable/disable reasoning based on configuration settings. Updated the kwargs to reflect these changes, ensuring proper handling of reasoning parameters during agent execution.

											
										
										
											2026-03-01 16:15:20 -08:00
+								                    normalized.append(reasoning_item)
 								                continue
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								            role = item.get("role")
 								            if role in {"user", "assistant"}:
 								                content = item.get("content", "")
 								                if content is None:
 								                    content = ""
 								                if not isinstance(content, str):
 								                    content = str(content)
 								                normalized.append({"role": role, "content": content})
 								                continue
 								            raise ValueError(
 								                f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
 								            )
 								        return normalized
 								    def _preflight_codex_api_kwargs(
 								        self,
 								        api_kwargs: Any,
 								        *,
 								        allow_stream: bool = False,
 								    ) -> Dict[str, Any]:
 								        if not isinstance(api_kwargs, dict):
 								            raise ValueError("Codex Responses request must be a dict.")
 								        required = {"model", "instructions", "input"}
 								        missing = [key for key in required if key not in api_kwargs]
 								        if missing:
 								            raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
 								        model = api_kwargs.get("model")
 								        if not isinstance(model, str) or not model.strip():
 								            raise ValueError("Codex Responses request 'model' must be a non-empty string.")
 								        model = model.strip()
 								        instructions = api_kwargs.get("instructions")
 								        if instructions is None:
 								            instructions = ""
 								        if not isinstance(instructions, str):
 								            instructions = str(instructions)
 								        instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
 								        normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
 								        tools = api_kwargs.get("tools")
 								        normalized_tools = None
 								        if tools is not None:
 								            if not isinstance(tools, list):
 								                raise ValueError("Codex Responses request 'tools' must be a list when provided.")
 								            normalized_tools = []
 								            for idx, tool in enumerate(tools):
 								                if not isinstance(tool, dict):
 								                    raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
 								                if tool.get("type") != "function":
 								                    raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
 								                name = tool.get("name")
 								                parameters = tool.get("parameters")
 								                if not isinstance(name, str) or not name.strip():
 								                    raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
 								                if not isinstance(parameters, dict):
 								                    raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
 								                description = tool.get("description", "")
 								                if description is None:
 								                    description = ""
 								                if not isinstance(description, str):
 								                    description = str(description)
 								                strict = tool.get("strict", False)
 								                if not isinstance(strict, bool):
 								                    strict = bool(strict)
 								                normalized_tools.append(
 								                    {
 								                        "type": "function",
 								                        "name": name.strip(),
 								                        "description": description,
 								                        "strict": strict,
 								                        "parameters": parameters,
 								                    }
 								                )
 								        store = api_kwargs.get("store", False)
 								        if store is not False:
 								            raise ValueError("Codex Responses contract requires 'store' to be false.")
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								        allowed_keys = {
 								            "model", "instructions", "input", "tools", "store",
 								            "reasoning", "include", "max_output_tokens", "temperature",
-												fix: allow tool_choice, parallel_tool_calls, prompt_cache_key in codex preflight

_preflight_codex_api_kwargs rejected these three fields as unsupported,
but _build_api_kwargs adds them to every codex request. This caused a
ValueError before _interruptible_api_call was reached, which was caught
by the retry loop and retried with exponential backoff — appearing as
an infinite hang in tests (275s total backoff across 6 retries).

The fix adds these keys to allowed_keys and passes them through to the
normalized request dict.

This fixes the hanging test_cron_run_job_codex_path_handles_internal_401_refresh
test (now passes in 2.6s instead of timing out).

											
										
										
											2026-03-11 07:00:14 -07:00
+								            "tool_choice", "parallel_tool_calls", "prompt_cache_key",
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								        }
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								        normalized: Dict[str, Any] = {
 								            "model": model,
 								            "instructions": instructions,
 								            "input": normalized_input,
 								            "tools": normalized_tools,
 								            "store": False,
 								        }
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								        # Pass through reasoning config
 								        reasoning = api_kwargs.get("reasoning")
 								        if isinstance(reasoning, dict):
 								            normalized["reasoning"] = reasoning
 								        include = api_kwargs.get("include")
 								        if isinstance(include, list):
 								            normalized["include"] = include
 								        # Pass through max_output_tokens and temperature
 								        max_output_tokens = api_kwargs.get("max_output_tokens")
 								        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
 								            normalized["max_output_tokens"] = int(max_output_tokens)
 								        temperature = api_kwargs.get("temperature")
 								        if isinstance(temperature, (int, float)):
 								            normalized["temperature"] = float(temperature)
-												fix: allow tool_choice, parallel_tool_calls, prompt_cache_key in codex preflight

_preflight_codex_api_kwargs rejected these three fields as unsupported,
but _build_api_kwargs adds them to every codex request. This caused a
ValueError before _interruptible_api_call was reached, which was caught
by the retry loop and retried with exponential backoff — appearing as
an infinite hang in tests (275s total backoff across 6 retries).

The fix adds these keys to allowed_keys and passes them through to the
normalized request dict.

This fixes the hanging test_cron_run_job_codex_path_handles_internal_401_refresh
test (now passes in 2.6s instead of timing out).

											
										
										
											2026-03-11 07:00:14 -07:00
+								        # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
 								        for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
 								            val = api_kwargs.get(passthrough_key)
 								            if val is not None:
 								                normalized[passthrough_key] = val
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								        if allow_stream:
 								            stream = api_kwargs.get("stream")
 								            if stream is not None and stream is not True:
 								                raise ValueError("Codex Responses 'stream' must be true when set.")
 								            if stream is True:
 								                normalized["stream"] = True
 								            allowed_keys.add("stream")
 								        elif "stream" in api_kwargs:
 								            raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
 								        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
 								        if unexpected:
 								            raise ValueError(
 								                f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
 								            )
 								        return normalized
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								    def _extract_responses_message_text(self, item: Any) -> str:
 								        """Extract assistant text from a Responses message output item."""
 								        content = getattr(item, "content", None)
 								        if not isinstance(content, list):
 								            return ""
 								        chunks: List[str] = []
 								        for part in content:
 								            ptype = getattr(part, "type", None)
 								            if ptype not in {"output_text", "text"}:
 								                continue
 								            text = getattr(part, "text", None)
 								            if isinstance(text, str) and text:
 								                chunks.append(text)
 								        return "".join(chunks).strip()
 								    def _extract_responses_reasoning_text(self, item: Any) -> str:
 								        """Extract a compact reasoning text from a Responses reasoning item."""
 								        summary = getattr(item, "summary", None)
 								        if isinstance(summary, list):
 								            chunks: List[str] = []
 								            for part in summary:
 								                text = getattr(part, "text", None)
 								                if isinstance(text, str) and text:
 								                    chunks.append(text)
 								            if chunks:
 								                return "\n".join(chunks).strip()
 								        text = getattr(item, "text", None)
 								        if isinstance(text, str) and text:
 								            return text.strip()
 								        return ""
 								    def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
 								        """Normalize a Responses API object to an assistant_message-like object."""
 								        output = getattr(response, "output", None)
 								        if not isinstance(output, list) or not output:
 								            raise RuntimeError("Responses API returned no output items")
 								        response_status = getattr(response, "status", None)
 								        if isinstance(response_status, str):
 								            response_status = response_status.strip().lower()
 								        else:
 								            response_status = None
 								        if response_status in {"failed", "cancelled"}:
 								            error_obj = getattr(response, "error", None)
 								            if isinstance(error_obj, dict):
 								                error_msg = error_obj.get("message") or str(error_obj)
 								            else:
 								                error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
 								            raise RuntimeError(error_msg)
 								        content_parts: List[str] = []
 								        reasoning_parts: List[str] = []
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								        reasoning_items_raw: List[Dict[str, Any]] = []
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								        tool_calls: List[Any] = []
 								        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								        saw_commentary_phase = False
 								        saw_final_answer_phase = False
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
 								        for item in output:
 								            item_type = getattr(item, "type", None)
 								            item_status = getattr(item, "status", None)
 								            if isinstance(item_status, str):
 								                item_status = item_status.strip().lower()
 								            else:
 								                item_status = None
 								            if item_status in {"queued", "in_progress", "incomplete"}:
 								                has_incomplete_items = True
 								            if item_type == "message":
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                item_phase = getattr(item, "phase", None)
 								                if isinstance(item_phase, str):
 								                    normalized_phase = item_phase.strip().lower()
 								                    if normalized_phase in {"commentary", "analysis"}:
 								                        saw_commentary_phase = True
 								                    elif normalized_phase in {"final_answer", "final"}:
 								                        saw_final_answer_phase = True
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                message_text = self._extract_responses_message_text(item)
 								                if message_text:
 								                    content_parts.append(message_text)
 								            elif item_type == "reasoning":
 								                reasoning_text = self._extract_responses_reasoning_text(item)
 								                if reasoning_text:
 								                    reasoning_parts.append(reasoning_text)
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                # Capture the full reasoning item for multi-turn continuity.
 								                # encrypted_content is an opaque blob the API needs back on
 								                # subsequent turns to maintain coherent reasoning chains.
 								                encrypted = getattr(item, "encrypted_content", None)
 								                if isinstance(encrypted, str) and encrypted:
 								                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
 								                    item_id = getattr(item, "id", None)
 								                    if isinstance(item_id, str) and item_id:
 								                        raw_item["id"] = item_id
-												feat(agent): add summary handling for reasoning items

Enhanced the AIAgent class to capture and normalize summary information for reasoning items. Implemented logic to handle summaries as lists, ensuring proper formatting for API interactions. Updated tests to validate the inclusion of summaries in reasoning items, both for existing and default cases.

											
										
										
											2026-03-01 20:03:03 -08:00
+								                    # Capture summary — required by the API when replaying reasoning items
 								                    summary = getattr(item, "summary", None)
 								                    if isinstance(summary, list):
 								                        raw_summary = []
 								                        for part in summary:
 								                            text = getattr(part, "text", None)
 								                            if isinstance(text, str):
 								                                raw_summary.append({"type": "summary_text", "text": text})
 								                        raw_item["summary"] = raw_summary
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    reasoning_items_raw.append(raw_item)
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								            elif item_type == "function_call":
 								                if item_status in {"queued", "in_progress", "incomplete"}:
 								                    continue
 								                fn_name = getattr(item, "name", "") or ""
 								                arguments = getattr(item, "arguments", "{}")
 								                if not isinstance(arguments, str):
 								                    arguments = str(arguments)
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                raw_call_id = getattr(item, "call_id", None)
 								                raw_item_id = getattr(item, "id", None)
 								                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
 								                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    call_id = f"call_{uuid.uuid4().hex[:12]}"
 								                call_id = call_id.strip()
 								                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
 								                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                tool_calls.append(SimpleNamespace(
 								                    id=call_id,
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                    call_id=call_id,
 								                    response_item_id=response_item_id,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    type="function",
 								                    function=SimpleNamespace(name=fn_name, arguments=arguments),
 								                ))
 								            elif item_type == "custom_tool_call":
 								                fn_name = getattr(item, "name", "") or ""
 								                arguments = getattr(item, "input", "{}")
 								                if not isinstance(arguments, str):
 								                    arguments = str(arguments)
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                raw_call_id = getattr(item, "call_id", None)
 								                raw_item_id = getattr(item, "id", None)
 								                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
 								                call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    call_id = f"call_{uuid.uuid4().hex[:12]}"
 								                call_id = call_id.strip()
 								                response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
 								                response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                tool_calls.append(SimpleNamespace(
 								                    id=call_id,
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                    call_id=call_id,
 								                    response_item_id=response_item_id,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    type="function",
 								                    function=SimpleNamespace(name=fn_name, arguments=arguments),
 								                ))
 								        final_text = "\n".join([p for p in content_parts if p]).strip()
 								        if not final_text and hasattr(response, "output_text"):
 								            out_text = getattr(response, "output_text", "")
 								            if isinstance(out_text, str):
 								                final_text = out_text.strip()
 								        assistant_message = SimpleNamespace(
 								            content=final_text,
 								            tool_calls=tool_calls,
 								            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
 								            reasoning_content=None,
 								            reasoning_details=None,
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            codex_reasoning_items=reasoning_items_raw or None,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								        )
 								        if tool_calls:
 								            finish_reason = "tool_calls"
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								            finish_reason = "incomplete"
 								        else:
 								            finish_reason = "stop"
 								        return assistant_message, finish_reason
 								    def _run_codex_stream(self, api_kwargs: dict):
 								        """Execute one streaming Responses API request and return the final response."""
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								        max_stream_retries = 1
 								        for attempt in range(max_stream_retries + 1):
 								            try:
 								                with self.client.responses.stream(**api_kwargs) as stream:
 								                    for _ in stream:
 								                        pass
 								                    return stream.get_final_response()
 								            except RuntimeError as exc:
 								                err_text = str(exc)
 								                missing_completed = "response.completed" in err_text
 								                if missing_completed and attempt < max_stream_retries:
 								                    logger.debug(
 								                        "Responses stream closed before completion (attempt %s/%s); retrying.",
 								                        attempt + 1,
 								                        max_stream_retries + 1,
 								                    )
 								                    continue
 								                if missing_completed:
 								                    logger.debug(
-												Fix Codex stream fallback for Responses completion gaps

											
										
										
											2026-02-25 19:08:11 -08:00
+								                        "Responses stream did not emit response.completed; falling back to create(stream=True)."
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								                    )
-												Fix Codex stream fallback for Responses completion gaps

											
										
										
											2026-02-25 19:08:11 -08:00
+								                    return self._run_codex_create_stream_fallback(api_kwargs)
-												Harden Codex stream handling and ack continuation

											
										
										
											2026-02-25 18:56:06 -08:00
+								                raise
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
-												Fix Codex stream fallback for Responses completion gaps

											
										
										
											2026-02-25 19:08:11 -08:00
+								    def _run_codex_create_stream_fallback(self, api_kwargs: dict):
 								        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
 								        fallback_kwargs = dict(api_kwargs)
 								        fallback_kwargs["stream"] = True
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
-												Fix Codex stream fallback for Responses completion gaps

											
										
										
											2026-02-25 19:08:11 -08:00
+								        stream_or_response = self.client.responses.create(**fallback_kwargs)
 								        # Compatibility shim for mocks or providers that still return a concrete response.
 								        if hasattr(stream_or_response, "output"):
 								            return stream_or_response
 								        if not hasattr(stream_or_response, "__iter__"):
 								            return stream_or_response
 								        terminal_response = None
 								        try:
 								            for event in stream_or_response:
 								                event_type = getattr(event, "type", None)
 								                if not event_type and isinstance(event, dict):
 								                    event_type = event.get("type")
 								                if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
 								                    continue
 								                terminal_response = getattr(event, "response", None)
 								                if terminal_response is None and isinstance(event, dict):
 								                    terminal_response = event.get("response")
 								                if terminal_response is not None:
 								                    return terminal_response
 								        finally:
 								            close_fn = getattr(stream_or_response, "close", None)
 								            if callable(close_fn):
 								                try:
 								                    close_fn()
 								                except Exception:
 								                    pass
 								        if terminal_response is not None:
 								            return terminal_response
 								        raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
 								        if self.api_mode != "codex_responses" or self.provider != "openai-codex":
 								            return False
 								        try:
 								            from hermes_cli.auth import resolve_codex_runtime_credentials
 								            creds = resolve_codex_runtime_credentials(force_refresh=force)
 								        except Exception as exc:
 								            logger.debug("Codex credential refresh failed: %s", exc)
 								            return False
 								        api_key = creds.get("api_key")
 								        base_url = creds.get("base_url")
 								        if not isinstance(api_key, str) or not api_key.strip():
 								            return False
 								        if not isinstance(base_url, str) or not base_url.strip():
 								            return False
 								        self.api_key = api_key.strip()
 								        self.base_url = base_url.strip().rstrip("/")
 								        self._client_kwargs["api_key"] = self.api_key
 								        self._client_kwargs["base_url"] = self.base_url
 								        try:
 								            self.client.close()
 								        except Exception:
 								            pass
 								        try:
 								            self.client = OpenAI(**self._client_kwargs)
 								        except Exception as exc:
 								            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
 								            return False
 								        return True
-												fix: implement Nous credential refresh on 401 error for retry logic

											
										
										
											2026-03-07 13:34:23 +11:00
+								    def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
 								        if self.api_mode != "chat_completions" or self.provider != "nous":
 								            return False
 								        try:
 								            from hermes_cli.auth import resolve_nous_runtime_credentials
 								            creds = resolve_nous_runtime_credentials(
 								                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
 								                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
 								                force_mint=force,
 								            )
 								        except Exception as exc:
 								            logger.debug("Nous credential refresh failed: %s", exc)
 								            return False
 								        api_key = creds.get("api_key")
 								        base_url = creds.get("base_url")
 								        if not isinstance(api_key, str) or not api_key.strip():
 								            return False
 								        if not isinstance(base_url, str) or not base_url.strip():
 								            return False
 								        self.api_key = api_key.strip()
 								        self.base_url = base_url.strip().rstrip("/")
 								        self._client_kwargs["api_key"] = self.api_key
 								        self._client_kwargs["base_url"] = self.base_url
 								        # Nous requests should not inherit OpenRouter-only attribution headers.
 								        self._client_kwargs.pop("default_headers", None)
 								        try:
 								            self.client.close()
 								        except Exception:
 								            pass
 								        try:
 								            self.client = OpenAI(**self._client_kwargs)
 								        except Exception as exc:
 								            logger.warning("Failed to rebuild OpenAI client after Nous refresh: %s", exc)
 								            return False
 								        return True
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								    def _interruptible_api_call(self, api_kwargs: dict):
 								        """
 								        Run the API call in a background thread so the main conversation loop
 								        can detect interrupts without waiting for the full HTTP round-trip.
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								        On interrupt, closes the HTTP client to cancel the in-flight request
 								        (stops token generation and avoids wasting money), then rebuilds the
 								        client for future calls.
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								        """
 								        result = {"response": None, "error": None}
 								        def _call():
 								            try:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                if self.api_mode == "codex_responses":
 								                    result["response"] = self._run_codex_stream(api_kwargs)
 								                else:
 								                    result["response"] = self.client.chat.completions.create(**api_kwargs)
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								            except Exception as e:
 								                result["error"] = e
 								        t = threading.Thread(target=_call, daemon=True)
 								        t.start()
 								        while t.is_alive():
 								            t.join(timeout=0.3)
 								            if self._interrupt_requested:
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								                # Force-close the HTTP connection to stop token generation
 								                try:
 								                    self.client.close()
 								                except Exception:
 								                    pass
 								                # Rebuild the client for future calls (cheap, no network)
 								                try:
 								                    self.client = OpenAI(**self._client_kwargs)
 								                except Exception:
 								                    pass
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                raise InterruptedError("Agent interrupted during API call")
 								        if result["error"] is not None:
 								            raise result["error"]
 								        return result["response"]
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    # ── Provider fallback ──────────────────────────────────────────────────
-												refactor: unified OAuth/API-key credential resolution for fallback

Split fallback provider handling into two clean registries:

  _FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
  _FALLBACK_OAUTH_PROVIDERS  — OAuth-based (openai-codex, nous)

New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.

Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.

OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.

4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.

											
										
										
											2026-03-08 21:44:48 -07:00
+								    # API-key providers: provider → (base_url, [env_var_names])
 								    _FALLBACK_API_KEY_PROVIDERS = {
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        "openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
-												fix: restrict fallback providers to actual hermes providers

Remove hallucinated providers (openai, deepseek, together, groq,
fireworks, mistral, gemini, nous) from the fallback provider map.
These don't exist in hermes-agent's provider system.

The real supported providers for fallback are:
  openrouter   (OPENROUTER_API_KEY)
  zai          (ZAI_API_KEY)
  kimi-coding  (KIMI_API_KEY)
  minimax      (MINIMAX_API_KEY)
  minimax-cn   (MINIMAX_CN_API_KEY)

For any other OpenAI-compatible endpoint, users can use the
base_url + api_key_env overrides in the config.

Also adds Kimi User-Agent header for kimi fallback (matching
the main provider system).

											
										
										
											2026-03-08 20:49:55 -07:00
+								        "zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
 								        "kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
 								        "minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
 								        "minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    }
-												refactor: unified OAuth/API-key credential resolution for fallback

Split fallback provider handling into two clean registries:

  _FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
  _FALLBACK_OAUTH_PROVIDERS  — OAuth-based (openai-codex, nous)

New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.

Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.

OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.

4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.

											
										
										
											2026-03-08 21:44:48 -07:00
+								    # OAuth providers: provider → (resolver_import_path, api_mode)
 								    # Each resolver returns {"api_key": ..., "base_url": ...}.
 								    _FALLBACK_OAUTH_PROVIDERS = {
 								        "openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
 								        "nous": ("resolve_nous_runtime_credentials", "chat_completions"),
 								    }
 								    def _resolve_fallback_credentials(
 								        self, fb_provider: str, fb_config: dict
 								    ) -> Optional[tuple]:
 								        """Resolve credentials for a fallback provider.
 								        Returns (api_key, base_url, api_mode) on success, or None on failure.
 								        Handles three cases:
 . OAuth providers (openai-codex, nous) — call credential resolver
 . API-key providers (openrouter, zai, etc.) — read env var
 . Custom endpoints — use base_url + api_key_env from config
 								        """
 								        # ── 1. OAuth providers ────────────────────────────────────────
 								        if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
 								            resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
 								            try:
 								                import hermes_cli.auth as _auth
 								                resolver = getattr(_auth, resolver_name)
 								                creds = resolver()
 								                return creds["api_key"], creds["base_url"], api_mode
 								            except Exception as e:
 								                logging.warning(
 								                    "Fallback to %s failed (credential resolution): %s",
 								                    fb_provider, e,
 								                )
 								                return None
 								        # ── 2. API-key providers ──────────────────────────────────────
 								        fb_key = (fb_config.get("api_key") or "").strip()
 								        if not fb_key:
 								            key_env = (fb_config.get("api_key_env") or "").strip()
 								            if key_env:
 								                fb_key = os.getenv(key_env, "")
 								            elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
 								                for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
 								                    fb_key = os.getenv(env_var, "")
 								                    if fb_key:
 								                        break
 								        if not fb_key:
 								            logging.warning(
 								                "Fallback model configured but no API key found for provider '%s'",
 								                fb_provider,
 								            )
 								            return None
 								        # ── 3. Resolve base URL ───────────────────────────────────────
 								        fb_base_url = (fb_config.get("base_url") or "").strip()
 								        if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
 								            fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
 								        if not fb_base_url:
 								            fb_base_url = OPENROUTER_BASE_URL
 								        return fb_key, fb_base_url, "chat_completions"
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								    def _try_activate_fallback(self) -> bool:
 								        """Switch to the configured fallback model/provider.
 								        Called when the primary model is failing after retries.  Swaps the
 								        OpenAI client, model slug, and provider in-place so the retry loop
 								        can continue with the new backend.  One-shot: returns False if
 								        already activated or not configured.
 								        """
 								        if self._fallback_activated or not self._fallback_model:
 								            return False
 								        fb = self._fallback_model
 								        fb_provider = (fb.get("provider") or "").strip().lower()
 								        fb_model = (fb.get("model") or "").strip()
 								        if not fb_provider or not fb_model:
 								            return False
-												refactor: unified OAuth/API-key credential resolution for fallback

Split fallback provider handling into two clean registries:

  _FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
  _FALLBACK_OAUTH_PROVIDERS  — OAuth-based (openai-codex, nous)

New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.

Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.

OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.

4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.

											
										
										
											2026-03-08 21:44:48 -07:00
+								        resolved = self._resolve_fallback_credentials(fb_provider, fb)
 								        if resolved is None:
 								            return False
 								        fb_key, fb_base_url, fb_api_mode = resolved
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
-												refactor: unified OAuth/API-key credential resolution for fallback

Split fallback provider handling into two clean registries:

  _FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
  _FALLBACK_OAUTH_PROVIDERS  — OAuth-based (openai-codex, nous)

New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.

Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.

OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.

4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.

											
										
										
											2026-03-08 21:44:48 -07:00
+								        # Build new client
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								        try:
 								            client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
 								            if "openrouter" in fb_base_url.lower():
 								                client_kwargs["default_headers"] = {
 								                    "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
 								                    "X-OpenRouter-Title": "Hermes Agent",
 								                    "X-OpenRouter-Categories": "productivity,cli-agent",
 								                }
-												fix: restrict fallback providers to actual hermes providers

Remove hallucinated providers (openai, deepseek, together, groq,
fireworks, mistral, gemini, nous) from the fallback provider map.
These don't exist in hermes-agent's provider system.

The real supported providers for fallback are:
  openrouter   (OPENROUTER_API_KEY)
  zai          (ZAI_API_KEY)
  kimi-coding  (KIMI_API_KEY)
  minimax      (MINIMAX_API_KEY)
  minimax-cn   (MINIMAX_CN_API_KEY)

For any other OpenAI-compatible endpoint, users can use the
base_url + api_key_env overrides in the config.

Also adds Kimi User-Agent header for kimi fallback (matching
the main provider system).

											
										
										
											2026-03-08 20:49:55 -07:00
+								            elif "api.kimi.com" in fb_base_url.lower():
 								                client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
 								            self.client = OpenAI(**client_kwargs)
 								            self._client_kwargs = client_kwargs
 								            old_model = self.model
 								            self.model = fb_model
 								            self.provider = fb_provider
 								            self.base_url = fb_base_url
-												feat: add openai-codex as fallback provider

Codex OAuth uses a different auth flow (OAuth tokens, not env vars)
and a different API mode (codex_responses, not chat_completions).
The fallback now handles this specially:

- Resolves credentials via resolve_codex_runtime_credentials()
- Sets api_mode to codex_responses
- Fails gracefully if no Codex OAuth session exists

Also added to the commented-out config.yaml example.
2 new tests (codex activation + graceful failure).

											
										
										
											2026-03-08 21:34:15 -07:00
+								            self.api_mode = fb_api_mode
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								            self._fallback_activated = True
 								            # Re-evaluate prompt caching for the new provider/model
 								            self._use_prompt_caching = (
 								                "openrouter" in fb_base_url.lower()
 								                and "claude" in fb_model.lower()
 								            )
 								            print(
 								                f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
 								                f"{fb_model} via {fb_provider}"
 								            )
 								            logging.info(
 								                "Fallback activated: %s → %s (%s)",
 								                old_model, fb_model, fb_provider,
 								            )
 								            return True
 								        except Exception as e:
 								            logging.error("Failed to activate fallback model: %s", e)
 								            return False
 								    # ── End provider fallback ──────────────────────────────────────────────
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								    def _build_api_kwargs(self, api_messages: list) -> dict:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								        """Build the keyword arguments dict for the active API mode."""
 								        if self.api_mode == "codex_responses":
 								            instructions = ""
 								            payload_messages = api_messages
 								            if api_messages and api_messages[0].get("role") == "system":
 								                instructions = str(api_messages[0].get("content") or "").strip()
 								                payload_messages = api_messages[1:]
 								            if not instructions:
 								                instructions = DEFAULT_AGENT_IDENTITY
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								            # Resolve reasoning effort: config > default (medium)
 								            reasoning_effort = "medium"
-												feat(agent): enhance reasoning handling and configuration

Added support for processing encrypted reasoning content within the AIAgent class. Introduced logic to determine reasoning effort and enable/disable reasoning based on configuration settings. Updated the kwargs to reflect these changes, ensuring proper handling of reasoning parameters during agent execution.

											
										
										
											2026-03-01 16:15:20 -08:00
+								            reasoning_enabled = True
 								            if self.reasoning_config and isinstance(self.reasoning_config, dict):
 								                if self.reasoning_config.get("enabled") is False:
 								                    reasoning_enabled = False
 								                elif self.reasoning_config.get("effort"):
 								                    reasoning_effort = self.reasoning_config["effort"]
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            kwargs = {
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                "model": self.model,
 								                "instructions": instructions,
 								                "input": self._chat_messages_to_responses_input(payload_messages),
 								                "tools": self._responses_tools(),
-												fix: add missing Responses API parameters for Codex provider

Adds tool_choice, parallel_tool_calls, and prompt_cache_key to the
Codex Responses API request kwargs — matching what the official Codex
CLI sends.

- tool_choice: 'auto' — enables the model to proactively call tools.
  Without this, the model may default to not using tools, which explains
  reports of the agent claiming it lacks shell access (#747).
- parallel_tool_calls: True — allows the model to issue multiple tool
  calls in a single turn for efficiency.
- prompt_cache_key: session_id — enables server-side prompt caching
  across turns in the same session, reducing latency and cost.

Refs #747

											
										
										
											2026-03-11 04:28:31 -07:00
+								                "tool_choice": "auto",
 								                "parallel_tool_calls": True,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                "store": False,
-												fix: add missing Responses API parameters for Codex provider

Adds tool_choice, parallel_tool_calls, and prompt_cache_key to the
Codex Responses API request kwargs — matching what the official Codex
CLI sends.

- tool_choice: 'auto' — enables the model to proactively call tools.
  Without this, the model may default to not using tools, which explains
  reports of the agent claiming it lacks shell access (#747).
- parallel_tool_calls: True — allows the model to issue multiple tool
  calls in a single turn for efficiency.
- prompt_cache_key: session_id — enables server-side prompt caching
  across turns in the same session, reducing latency and cost.

Refs #747

											
										
										
											2026-03-11 04:28:31 -07:00
+								                "prompt_cache_key": self.session_id,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								            }
-												feat(agent): enhance reasoning handling and configuration

Added support for processing encrypted reasoning content within the AIAgent class. Introduced logic to determine reasoning effort and enable/disable reasoning based on configuration settings. Updated the kwargs to reflect these changes, ensuring proper handling of reasoning parameters during agent execution.

											
										
										
											2026-03-01 16:15:20 -08:00
+								            if reasoning_enabled:
 								                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
 								                kwargs["include"] = ["reasoning.encrypted_content"]
 								            else:
 								                kwargs["include"] = []
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
 								            if self.max_tokens is not None:
 								                kwargs["max_output_tokens"] = self.max_tokens
 								            return kwargs
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        provider_preferences = {}
 								        if self.providers_allowed:
 								            provider_preferences["only"] = self.providers_allowed
 								        if self.providers_ignored:
 								            provider_preferences["ignore"] = self.providers_ignored
 								        if self.providers_order:
 								            provider_preferences["order"] = self.providers_order
 								        if self.provider_sort:
 								            provider_preferences["sort"] = self.provider_sort
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
+								        if self.provider_require_parameters:
 								            provider_preferences["require_parameters"] = True
 								        if self.provider_data_collection:
 								            provider_preferences["data_collection"] = self.provider_data_collection
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								        api_kwargs = {
 								            "model": self.model,
 								            "messages": api_messages,
 								            "tools": self.tools if self.tools else None,
-												fix(cli): increase max iterations for child agents and extend API call timeout for improved reliability

											
										
										
											2026-02-27 17:35:26 -08:00
+								            "timeout": 900.0,
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        }
 								        if self.max_tokens is not None:
-												feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others).
- Updated API calls in AIAgent to utilize the new max tokens handling.
- Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients.
- Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.

											
										
										
											2026-02-26 20:23:56 -08:00
+								            api_kwargs.update(self._max_tokens_param(self.max_tokens))
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								        extra_body = {}
 								        if provider_preferences:
 								            extra_body["provider"] = provider_preferences
-												refactor: streamline reasoning configuration checks in AIAgent

- Simplified the logic for determining support for reasoning based on the base URL by introducing clearer variable names.
- Added product attribution for the Nous Portal to the extra body of requests when applicable, enhancing tagging for better tracking.

											
										
										
											2026-02-25 16:49:41 -08:00
+								        _is_openrouter = "openrouter" in self.base_url.lower()
 								        _is_nous = "nousresearch" in self.base_url.lower()
-												fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors (#253)

* fix(agent): skip reasoning param for Mistral API to prevent 422 errors

* fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors
											
										
										
											2026-03-02 11:35:03 +03:00
+								        _is_mistral = "api.mistral.ai" in self.base_url.lower()
 								        if (_is_openrouter or _is_nous) and not _is_mistral:
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            if self.reasoning_config is not None:
 								                extra_body["reasoning"] = self.reasoning_config
 								            else:
 								                extra_body["reasoning"] = {
 								                    "enabled": True,
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								                    "effort": "medium"
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                }
-												refactor: streamline reasoning configuration checks in AIAgent

- Simplified the logic for determining support for reasoning based on the base URL by introducing clearer variable names.
- Added product attribution for the Nous Portal to the extra body of requests when applicable, enhancing tagging for better tracking.

											
										
										
											2026-02-25 16:49:41 -08:00
+								        # Nous Portal product attribution
 								        if _is_nous:
 								            extra_body["tags"] = ["product=hermes-agent"]
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        if extra_body:
 								            api_kwargs["extra_body"] = extra_body
 								        return api_kwargs
 								    def _build_assistant_message(self, assistant_message, finish_reason: str) -> dict:
 								        """Build a normalized assistant message dict from an API response message.
 								        Handles reasoning extraction, reasoning_details, and optional tool_calls
 								        so both the tool-call path and the final-response path share one builder.
 								        """
 								        reasoning_text = self._extract_reasoning(assistant_message)
 								        if reasoning_text and self.verbose_logging:
 								            preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
 								            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								        if reasoning_text and self.reasoning_callback:
 								            try:
 								                self.reasoning_callback(reasoning_text)
 								            except Exception:
 								                pass
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        msg = {
 								            "role": "assistant",
 								            "content": assistant_message.content or "",
 								            "reasoning": reasoning_text,
 								            "finish_reason": finish_reason,
 								        }
 								        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            # Pass reasoning_details back unmodified so providers (OpenRouter,
 								            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
 								            # Each provider may include opaque fields (signature, encrypted_content)
 								            # that must be preserved exactly.
 								            raw_details = assistant_message.reasoning_details
 								            preserved = []
 								            for d in raw_details:
 								                if isinstance(d, dict):
 								                    preserved.append(d)
 								                elif hasattr(d, "__dict__"):
 								                    preserved.append(d.__dict__)
 								                elif hasattr(d, "model_dump"):
 								                    preserved.append(d.model_dump())
 								            if preserved:
 								                msg["reasoning_details"] = preserved
 								        # Codex Responses API: preserve encrypted reasoning items for
 								        # multi-turn continuity. These get replayed as input on the next turn.
 								        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
 								        if codex_items:
 								            msg["codex_reasoning_items"] = codex_items
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								        if assistant_message.tool_calls:
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								            tool_calls = []
 								            for tool_call in assistant_message.tool_calls:
 								                raw_id = getattr(tool_call, "id", None)
 								                call_id = getattr(tool_call, "call_id", None)
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    embedded_call_id, _ = self._split_responses_tool_id(raw_id)
 								                    call_id = embedded_call_id
 								                if not isinstance(call_id, str) or not call_id.strip():
 								                    if isinstance(raw_id, str) and raw_id.strip():
 								                        call_id = raw_id.strip()
 								                    else:
 								                        call_id = f"call_{uuid.uuid4().hex[:12]}"
 								                call_id = call_id.strip()
 								                response_item_id = getattr(tool_call, "response_item_id", None)
 								                if not isinstance(response_item_id, str) or not response_item_id.strip():
 								                    _, embedded_response_item_id = self._split_responses_tool_id(raw_id)
 								                    response_item_id = embedded_response_item_id
 								                response_item_id = self._derive_responses_function_call_id(
 								                    call_id,
 								                    response_item_id if isinstance(response_item_id, str) else None,
 								                )
-												fix: preserve Gemini thought_signature in tool call messages

Gemini 3 thinking models attach extra_content with thought_signature
to function call responses. This must be echoed back on subsequent
API calls or the server rejects with a 400 error. The assistant
message builder was dropping this field, causing all Gemini 3 Flash/Pro
tool-calling flows to fail after the first function call.

											
										
										
											2026-02-28 18:01:13 +03:00
+								                tc_dict = {
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                    "id": call_id,
 								                    "call_id": call_id,
 								                    "response_item_id": response_item_id,
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    "type": tool_call.type,
 								                    "function": {
 								                        "name": tool_call.function.name,
 								                        "arguments": tool_call.function.arguments
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								                    },
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                }
-												fix: preserve Gemini thought_signature in tool call messages

Gemini 3 thinking models attach extra_content with thought_signature
to function call responses. This must be echoed back on subsequent
API calls or the server rejects with a 400 error. The assistant
message builder was dropping this field, causing all Gemini 3 Flash/Pro
tool-calling flows to fail after the first function call.

											
										
										
											2026-02-28 18:01:13 +03:00
+								                # Preserve extra_content (e.g. Gemini thought_signature) so it
 								                # is sent back on subsequent API calls.  Without this, Gemini 3
 								                # thinking models reject the request with a 400 error.
 								                extra = getattr(tool_call, "extra_content", None)
 								                if extra is not None:
 								                    if hasattr(extra, "model_dump"):
 								                        extra = extra.model_dump()
 								                    tc_dict["extra_content"] = extra
-												Merge remote-tracking branch 'origin/main' into feature/homeassistant-integration

# Conflicts:
#	run_agent.py

											
										
										
											2026-03-01 11:59:12 +03:00
+								                tool_calls.append(tc_dict)
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								            msg["tool_calls"] = tool_calls
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								        return msg
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								    def flush_memories(self, messages: list = None, min_turns: int = None):
 								        """Give the model one turn to persist memories before context is lost.
 								        Called before compression, session reset, or CLI exit. Injects a flush
 								        message, makes one API call, executes any memory tool calls, then
 								        strips all flush artifacts from the message list.
 								        Args:
 								            messages: The current conversation messages. If None, uses
 								                      self._session_messages (last run_conversation state).
 								            min_turns: Minimum user turns required to trigger the flush.
 								                       None = use config value (flush_min_turns).
 = always flush (used for compression).
 								        """
 								        if self._memory_flush_min_turns == 0 and min_turns is None:
 								            return
 								        if "memory" not in self.valid_tool_names or not self._memory_store:
 								            return
 								        effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
 								        if self._user_turn_count < effective_min:
 								            return
 								        if messages is None:
 								            messages = getattr(self, '_session_messages', None)
 								        if not messages or len(messages) < 3:
 								            return
 								        flush_content = (
 								            "[System: The session is being compressed. "
 								            "Please save anything worth remembering to your memories.]"
 								        )
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
 								        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        messages.append(flush_msg)
 								        try:
 								            # Build API messages for the flush call
 								            api_messages = []
 								            for msg in messages:
 								                api_msg = msg.copy()
 								                if msg.get("role") == "assistant":
 								                    reasoning = msg.get("reasoning")
 								                    if reasoning:
 								                        api_msg["reasoning_content"] = reasoning
 								                api_msg.pop("reasoning", None)
-												fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors (#253)

* fix(agent): skip reasoning param for Mistral API to prevent 422 errors

* fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors
											
										
										
											2026-03-02 11:35:03 +03:00
+								                api_msg.pop("finish_reason", None)
-												fix(agent): strip _flush_sentinel from API messages

											
										
										
											2026-03-01 02:51:31 +03:00
+								                api_msg.pop("_flush_sentinel", None)
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								                api_messages.append(api_msg)
 								            if self._cached_system_prompt:
 								                api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
 								            # Make one API call with only the memory tool available
 								            memory_tool_def = None
 								            for t in (self.tools or []):
 								                if t.get("function", {}).get("name") == "memory":
 								                    memory_tool_def = t
 								                    break
 								            if not memory_tool_def:
 								                messages.pop()  # remove flush msg
 								                return
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            # Use auxiliary client for the flush call when available --
 								            # it's cheaper and avoids Codex Responses API incompatibility.
 								            from agent.auxiliary_client import get_text_auxiliary_client
 								            aux_client, aux_model = get_text_auxiliary_client()
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            if aux_client:
 								                api_kwargs = {
 								                    "model": aux_model,
 								                    "messages": api_messages,
 								                    "tools": [memory_tool_def],
 								                    "temperature": 0.3,
 								                    "max_tokens": 5120,
 								                }
 								                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
 								            elif self.api_mode == "codex_responses":
 								                # No auxiliary client -- use the Codex Responses path directly
 								                codex_kwargs = self._build_api_kwargs(api_messages)
 								                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
 								                codex_kwargs["temperature"] = 0.3
 								                if "max_output_tokens" in codex_kwargs:
 								                    codex_kwargs["max_output_tokens"] = 5120
 								                response = self._run_codex_stream(codex_kwargs)
 								            else:
 								                api_kwargs = {
 								                    "model": self.model,
 								                    "messages": api_messages,
 								                    "tools": [memory_tool_def],
 								                    "temperature": 0.3,
 								                    **self._max_tokens_param(5120),
 								                }
 								                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            # Extract tool calls from the response, handling both API formats
 								            tool_calls = []
 								            if self.api_mode == "codex_responses" and not aux_client:
 								                assistant_msg, _ = self._normalize_codex_response(response)
 								                if assistant_msg and assistant_msg.tool_calls:
 								                    tool_calls = assistant_msg.tool_calls
 								            elif hasattr(response, "choices") and response.choices:
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								                assistant_message = response.choices[0].message
 								                if assistant_message.tool_calls:
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    tool_calls = assistant_message.tool_calls
 								            for tc in tool_calls:
 								                if tc.function.name == "memory":
 								                    try:
 								                        args = json.loads(tc.function.arguments)
 								                        flush_target = args.get("target", "memory")
 								                        from tools.memory_tool import memory_tool as _memory_tool
 								                        result = _memory_tool(
 								                            action=args.get("action"),
 								                            target=flush_target,
 								                            content=args.get("content"),
 								                            old_text=args.get("old_text"),
 								                            store=self._memory_store,
 								                        )
 								                        if self._honcho and flush_target == "user" and args.get("action") == "add":
 								                            self._honcho_save_user_observation(args.get("content", ""))
 								                        if not self.quiet_mode:
 								                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
 								                    except Exception as e:
 								                        logger.debug("Memory flush tool call failed: %s", e)
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        except Exception as e:
 								            logger.debug("Memory flush API call failed: %s", e)
 								        finally:
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								            # Strip flush artifacts: remove everything from the flush message onward.
 								            # Use sentinel marker instead of identity check for robustness.
 								            while messages and messages[-1].get("_flush_sentinel") != _sentinel:
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								                messages.pop()
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                if not messages:
 								                    break
 								            if messages and messages[-1].get("_flush_sentinel") == _sentinel:
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								                messages.pop()
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								    def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default") -> tuple:
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        """Compress conversation context and split the session in SQLite.
 								        Returns:
 								            (compressed_messages, new_system_prompt) tuple
 								        """
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        # Pre-compression memory flush: let the model save memories before they're lost
 								        self.flush_memories(messages, min_turns=0)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
 								        todo_snapshot = self._todo_store.format_for_injection()
 								        if todo_snapshot:
 								            compressed.append({"role": "user", "content": todo_snapshot})
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								        # Preserve file-read history so the model doesn't re-read files
 								        # it already examined before compression.
 								        try:
 								            from tools.file_tools import get_read_files_summary
 								            read_files = get_read_files_summary(task_id)
 								            if read_files:
 								                file_list = "\n".join(
 								                    f"  - {f['path']} ({', '.join(f['regions'])})"
 								                    for f in read_files
 								                )
 								                compressed.append({"role": "user", "content": (
 								                    "[Files already read in this session — do NOT re-read these]\n"
 								                    f"{file_list}\n"
 								                    "Use the information from the context summary above. "
 								                    "Proceed with writing, editing, or responding."
 								                )})
 								        except Exception:
 								            pass  # Don't break compression if file tracking fails
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        self._invalidate_system_prompt()
 								        new_system_prompt = self._build_system_prompt(system_message)
 								        self._cached_system_prompt = new_system_prompt
 								        if self._session_db:
 								            try:
-												feat: session naming with unique titles, auto-lineage, rich listing, resume by name

- Schema v4: unique title index, migration from v2/v3
- set/get/resolve session titles with uniqueness enforcement
- Auto-lineage: context compression auto-numbers titles (Task -> Task #2 -> Task #3)
- resolve_session_by_title: auto-latest finds most recent continuation
- list_sessions_rich: preview (first 60 chars) + last_active timestamp
- CLI: -c accepts optional name arg (hermes -c 'my project')
- CLI: /title command with deferred mode (set before session exists)
- CLI: sessions list shows Title, Preview, Last Active, ID
- 27 new tests (1844 total passing)

											
										
										
											2026-03-08 15:20:29 -07:00
+								                # Propagate title to the new session with auto-numbering
 								                old_title = self._session_db.get_session_title(self.session_id)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                self._session_db.end_session(self.session_id, "compression")
 								                old_session_id = self.session_id
 								                self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
 								                self._session_db.create_session(
 								                    session_id=self.session_id,
 								                    source=self.platform or "cli",
 								                    model=self.model,
 								                    parent_session_id=old_session_id,
 								                )
-												feat: session naming with unique titles, auto-lineage, rich listing, resume by name

- Schema v4: unique title index, migration from v2/v3
- set/get/resolve session titles with uniqueness enforcement
- Auto-lineage: context compression auto-numbers titles (Task -> Task #2 -> Task #3)
- resolve_session_by_title: auto-latest finds most recent continuation
- list_sessions_rich: preview (first 60 chars) + last_active timestamp
- CLI: -c accepts optional name arg (hermes -c 'my project')
- CLI: /title command with deferred mode (set before session exists)
- CLI: sessions list shows Title, Preview, Last Active, ID
- 27 new tests (1844 total passing)

											
										
										
											2026-03-08 15:20:29 -07:00
+								                # Auto-number the title for the continuation session
 								                if old_title:
 								                    try:
 								                        new_title = self._session_db.get_next_title_in_lineage(old_title)
 								                        self._session_db.set_session_title(self.session_id, new_title)
 								                    except (ValueError, Exception) as e:
 								                        logger.debug("Could not propagate title on compression: %s", e)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                self._session_db.update_system_prompt(self.session_id, new_system_prompt)
-												fix: eliminate 3x SQLite message duplication in gateway sessions (#860)

Three separate code paths all wrote to the same SQLite state.db with
no deduplication, inflating session transcripts by 3-4x:

1. _log_msg_to_db() — wrote each message individually after append
2. _flush_messages_to_session_db() — re-wrote ALL new messages at
   every _persist_session() call (~18 exit points), with no tracking
   of what was already written
3. gateway append_to_transcript() — wrote everything a third time
   after the agent returned

Since load_transcript() prefers SQLite over JSONL, the inflated data
was loaded on every session resume, causing proportional token waste.

Fix:
- Remove _log_msg_to_db() and all 16 call sites (redundant with flush)
- Add _last_flushed_db_idx tracking in _flush_messages_to_session_db()
  so repeated _persist_session() calls only write truly new messages
- Reset flush cursor on compression (new session ID)
- Add skip_db parameter to SessionStore.append_to_transcript() so the
  gateway skips SQLite writes when the agent already persisted them
- Gateway now passes skip_db=True for agent-managed messages, still
  writes to JSONL as backup

Verified: a 12-message CLI session with tool calls produces exactly
12 SQLite rows with zero duplicates (previously would be 36-48).

Tests: 9 new tests covering flush deduplication, skip_db behavior,
compression reset, and initialization. Full suite passes (2869 tests).

											
										
										
											2026-03-10 15:22:44 -07:00
+								                # Reset flush cursor — new session starts with no messages written
 								                self._last_flushed_db_idx = 0
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            except Exception as e:
 								                logger.debug("Session DB compression split failed: %s", e)
 								        return compressed, new_system_prompt
-												feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.

											
										
										
											2026-03-09 05:19:42 -07:00
+								    def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								        """Execute tool calls from the assistant message and append results to messages."""
 								        for i, tool_call in enumerate(assistant_message.tool_calls, 1):
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								            # SAFETY: check interrupt BEFORE starting each tool.
 								            # If the user sent "stop" during a previous tool's execution,
 								            # do NOT start any more tools -- skip them all immediately.
 								            if self._interrupt_requested:
 								                remaining_calls = assistant_message.tool_calls[i-1:]
 								                if remaining_calls:
 								                    print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
 								                for skipped_tc in remaining_calls:
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                    skipped_name = skipped_tc.function.name
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								                    skip_msg = {
 								                        "role": "tool",
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                        "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
-												feat: enhance interrupt handling and container resource configuration

- Introduced a shared interrupt signaling mechanism to allow tools to check for user interrupts during long-running operations.
- Updated the AIAgent to handle interrupts more effectively, ensuring in-progress tool calls are canceled and multiple interrupt messages are combined into one prompt.
- Enhanced the CLI configuration to include container resource limits (CPU, memory, disk) and persistence options for Docker, Singularity, and Modal environments.
- Improved documentation to clarify interrupt behaviors and container resource settings, providing users with better guidance on configuration and usage.

											
										
										
											2026-02-23 02:11:33 -08:00
+								                        "tool_call_id": skipped_tc.id,
 								                    }
 								                    messages.append(skip_msg)
 								                break
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            function_name = tool_call.function.name
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								            # Reset nudge counters when the relevant tool is actually used
 								            if function_name == "memory":
 								                self._turns_since_memory = 0
 								            elif function_name == "skill_manage":
 								                self._iters_since_skill = 0
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            try:
 								                function_args = json.loads(tool_call.function.arguments)
 								            except json.JSONDecodeError as e:
 								                logging.warning(f"Unexpected JSON error after validation: {e}")
 								                function_args = {}
-												fix: add upstream guard for non-dict function_args + tests for build_tool_preview

Complements PR #453 by 0xbyt4. Adds isinstance(dict) guard in
run_agent.py to catch cases where json.loads returns non-dict
(e.g. null, list, string) before they reach downstream code.

Also adds 15 tests for build_tool_preview covering None args,
empty dicts, known/unknown tools, fallback keys, truncation,
and all special-cased tools (process, todo, memory, session_search).

											
										
										
											2026-03-09 21:01:40 -07:00
+								            if not isinstance(function_args, dict):
 								                function_args = {}
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								            if not self.quiet_mode:
 								                args_str = json.dumps(function_args, ensure_ascii=False)
 								                args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
 								                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
 								            if self.tool_progress_callback:
 								                try:
 								                    preview = _build_tool_preview(function_name, function_args)
-												feat: show detailed tool call args in gateway based on config

Issue #263: Telegram/Discord/WhatsApp/Slack now show tool call details
based on display.tool_progress in config.yaml.

Changes:
- gateway/run.py: 'verbose' mode shows full args (keys + JSON, 200 char
  max). 'all' mode preview increased from 40 to 80 chars. Added missing
  tool emojis (execute_code, delegate_task, clarify, skill_manage,
  search_files).
- agent/display.py: Added execute_code, delegate_task, clarify,
  skill_manage to primary_args. Added 'code' and 'goal' to fallback keys.
- run_agent.py: Pass function_args dict to tool_progress_callback so
  gateway can format based on its own verbosity config.

Config usage:
  display:
    tool_progress: verbose  # off | new | all | verbose

											
										
										
											2026-03-02 05:23:15 -08:00
+								                    self.tool_progress_callback(function_name, preview, function_args)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                except Exception as cb_err:
 								                    logging.debug(f"Tool progress callback error: {cb_err}")
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								            # Checkpoint: snapshot working dir before file-mutating tools
 								            if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
 								                try:
 								                    file_path = function_args.get("path", "")
 								                    if file_path:
 								                        work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
 								                        self._checkpoint_mgr.ensure_checkpoint(
 								                            work_dir, f"before {function_name}"
 								                        )
 								                except Exception:
 								                    pass  # never block tool execution
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            tool_start_time = time.time()
 								            if function_name == "todo":
 								                from tools.todo_tool import todo_tool as _todo_tool
 								                function_result = _todo_tool(
 								                    todos=function_args.get("todos"),
 								                    merge=function_args.get("merge", False),
 								                    store=self._todo_store,
 								                )
 								                tool_duration = time.time() - tool_start_time
 								                if self.quiet_mode:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    print(f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
-												fix(gateway): Pass session_db to AIAgent, fixing session_search error

When running via the gateway (e.g. Telegram), the session_search tool
returned: {"error": "session_search must be handled by the agent loop"}

Root cause:
- gateway/run.py creates AIAgent without passing session_db=
- self._session_db is None in the agent instance
- The dispatch condition "elif function_name == 'session_search' and self._session_db"
  skips when _session_db is None, falling through to the generic error

This fix:
1. Initializes self._session_db in GatewayRunner.__init__()
2. Passes session_db to all AIAgent instantiations in gateway/run.py
3. Adds defensive fallback in run_agent.py to return a clear error when
   session_db is unavailable, instead of falling through

Fixes #105

											
										
										
											2026-02-27 00:32:17 -05:00
+								            elif function_name == "session_search":
 								                if not self._session_db:
 								                    function_result = json.dumps({"success": False, "error": "Session database not available."})
 								                else:
 								                    from tools.session_search_tool import session_search as _session_search
 								                    function_result = _session_search(
 								                        query=function_args.get("query", ""),
 								                        role_filter=function_args.get("role_filter"),
 								                        limit=function_args.get("limit", 3),
 								                        db=self._session_db,
-												fix: exclude current session from session_search results

session_search was returning the current session if it matched the
query, which is redundant — the agent already has the current
conversation context. This wasted an LLM summarization call and a
result slot.

Added current_session_id parameter to session_search(). The agent
passes self.session_id and the search filters out any results where
either the raw or parent-resolved session ID matches. Both the raw
match and the parent-resolved match are checked to handle child
sessions from delegation.

Two tests added verifying the exclusion works and that other
sessions are still returned.

											
										
										
											2026-03-04 06:06:40 -08:00
+								                        current_session_id=self.session_id,
-												fix(gateway): Pass session_db to AIAgent, fixing session_search error

When running via the gateway (e.g. Telegram), the session_search tool
returned: {"error": "session_search must be handled by the agent loop"}

Root cause:
- gateway/run.py creates AIAgent without passing session_db=
- self._session_db is None in the agent instance
- The dispatch condition "elif function_name == 'session_search' and self._session_db"
  skips when _session_db is None, falling through to the generic error

This fix:
1. Initializes self._session_db in GatewayRunner.__init__()
2. Passes session_db to all AIAgent instantiations in gateway/run.py
3. Adds defensive fallback in run_agent.py to return a clear error when
   session_db is unavailable, instead of falling through

Fixes #105

											
										
										
											2026-02-27 00:32:17 -05:00
+								                    )
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                tool_duration = time.time() - tool_start_time
 								                if self.quiet_mode:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    print(f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            elif function_name == "memory":
-												feat: integrate Honcho with USER.md memory system

When Honcho is active:
- System prompt uses Honcho prefetch instead of USER.md
- memory tool target=user add routes to Honcho
- MEMORY.md untouched in all cases

When disabled, everything works as before.

Also wires up contextTokens config to cap prefetch size.

											
										
										
											2026-02-26 18:02:24 -05:00
+								                target = function_args.get("target", "memory")
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                from tools.memory_tool import memory_tool as _memory_tool
 								                function_result = _memory_tool(
 								                    action=function_args.get("action"),
-												refactor: run Honcho and USER.md in tandem

USER.md stays in system prompt when Honcho is active -- prefetch is
additive context, not a replacement. Memory tool user observations
write to both USER.md (local) and Honcho (cross-session) simultaneously.

											
										
										
											2026-02-26 18:07:33 -05:00
+								                    target=target,
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    content=function_args.get("content"),
 								                    old_text=function_args.get("old_text"),
 								                    store=self._memory_store,
 								                )
-												refactor: run Honcho and USER.md in tandem

USER.md stays in system prompt when Honcho is active -- prefetch is
additive context, not a replacement. Memory tool user observations
write to both USER.md (local) and Honcho (cross-session) simultaneously.

											
										
										
											2026-02-26 18:07:33 -05:00
+								                # Also send user observations to Honcho when active
-												feat: integrate Honcho with USER.md memory system

When Honcho is active:
- System prompt uses Honcho prefetch instead of USER.md
- memory tool target=user add routes to Honcho
- MEMORY.md untouched in all cases

When disabled, everything works as before.

Also wires up contextTokens config to cap prefetch size.

											
										
										
											2026-02-26 18:02:24 -05:00
+								                if self._honcho and target == "user" and function_args.get("action") == "add":
-												refactor: run Honcho and USER.md in tandem

USER.md stays in system prompt when Honcho is active -- prefetch is
additive context, not a replacement. Memory tool user observations
write to both USER.md (local) and Honcho (cross-session) simultaneously.

											
										
										
											2026-02-26 18:07:33 -05:00
+								                    self._honcho_save_user_observation(function_args.get("content", ""))
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                tool_duration = time.time() - tool_start_time
 								                if self.quiet_mode:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    print(f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            elif function_name == "clarify":
 								                from tools.clarify_tool import clarify_tool as _clarify_tool
 								                function_result = _clarify_tool(
 								                    question=function_args.get("question", ""),
 								                    choices=function_args.get("choices"),
 								                    callback=self.clarify_callback,
 								                )
 								                tool_duration = time.time() - tool_start_time
 								                if self.quiet_mode:
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    print(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            elif function_name == "delegate_task":
 								                from tools.delegate_tool import delegate_task as _delegate_task
 								                tasks_arg = function_args.get("tasks")
 								                if tasks_arg and isinstance(tasks_arg, list):
 								                    spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
 								                else:
 								                    goal_preview = (function_args.get("goal") or "")[:30]
 								                    spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
 								                spinner = None
 								                if self.quiet_mode:
 								                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
 								                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
 								                    spinner.start()
 								                self._delegate_spinner = spinner
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                _delegate_result = None
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                try:
 								                    function_result = _delegate_task(
 								                        goal=function_args.get("goal"),
 								                        context=function_args.get("context"),
 								                        toolsets=function_args.get("toolsets"),
 								                        tasks=tasks_arg,
 								                        max_iterations=function_args.get("max_iterations"),
 								                        parent_agent=self,
 								                    )
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    _delegate_result = function_result
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                finally:
 								                    self._delegate_spinner = None
 								                    tool_duration = time.time() - tool_start_time
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    if spinner:
 								                        spinner.stop(cute_msg)
 								                    elif self.quiet_mode:
 								                        print(f"  {cute_msg}")
 								            elif self.quiet_mode:
 								                face = random.choice(KawaiiSpinner.KAWAII_WAITING)
 								                tool_emoji_map = {
 								                    'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
 								                    'terminal': '💻', 'process': '⚙️',
 								                    'read_file': '📖', 'write_file': '✍️', 'patch': '🔧', 'search_files': '🔎',
 								                    'browser_navigate': '🌐', 'browser_snapshot': '📸',
 								                    'browser_click': '👆', 'browser_type': '⌨️',
 								                    'browser_scroll': '📜', 'browser_back': '◀️',
 								                    'browser_press': '⌨️', 'browser_close': '🚪',
 								                    'browser_get_images': '🖼️', 'browser_vision': '👁️',
 								                    'image_generate': '🎨', 'text_to_speech': '🔊',
 								                    'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
 								                    'skills_list': '📚', 'skill_view': '📚',
 								                    'schedule_cronjob': '⏰', 'list_cronjobs': '⏰', 'remove_cronjob': '⏰',
 								                    'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
 								                    'clarify': '❓', 'execute_code': '🐍', 'delegate_task': '🔀',
 								                }
 								                emoji = tool_emoji_map.get(function_name, '⚡')
 								                preview = _build_tool_preview(function_name, function_args) or function_name
 								                if len(preview) > 30:
 								                    preview = preview[:27] + "..."
 								                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
 								                spinner.start()
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                _spinner_result = None
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                try:
-												fix: wire up enabled_tools in agent loop + simplify sandbox tool selection

Completes the fix started in 8318a51 — handle_function_call() accepted
enabled_tools but run_agent.py never passed it. Now both call sites in
_execute_tool_calls() pass self.valid_tool_names, so each agent session
uses its own tool list instead of the process-global
_last_resolved_tool_names (which subagents can overwrite).

Also simplifies the redundant ternary in code_execution_tool.py:
sandbox_tools is already computed correctly (intersection with session
tools, or full SANDBOX_ALLOWED_TOOLS as fallback), so the conditional
was dead logic.

Inspired by PR #663 (JasonOA888). Closes #662.
Tests: 2857 passed.

											
										
										
											2026-03-10 06:35:28 -07:00
+								                    function_result = handle_function_call(
 								                        function_name, function_args, effective_task_id,
 								                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
 								                    )
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    _spinner_result = function_result
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                except Exception as tool_error:
 								                    function_result = f"Error executing tool '{function_name}': {tool_error}"
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                finally:
 								                    tool_duration = time.time() - tool_start_time
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    spinner.stop(cute_msg)
 								            else:
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                try:
-												fix: wire up enabled_tools in agent loop + simplify sandbox tool selection

Completes the fix started in 8318a51 — handle_function_call() accepted
enabled_tools but run_agent.py never passed it. Now both call sites in
_execute_tool_calls() pass self.valid_tool_names, so each agent session
uses its own tool list instead of the process-global
_last_resolved_tool_names (which subagents can overwrite).

Also simplifies the redundant ternary in code_execution_tool.py:
sandbox_tools is already computed correctly (intersection with session
tools, or full SANDBOX_ALLOWED_TOOLS as fallback), so the conditional
was dead logic.

Inspired by PR #663 (JasonOA888). Closes #662.
Tests: 2857 passed.

											
										
										
											2026-03-10 06:35:28 -07:00
+								                    function_result = handle_function_call(
 								                        function_name, function_args, effective_task_id,
 								                        enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
 								                    )
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                except Exception as tool_error:
 								                    function_result = f"Error executing tool '{function_name}': {tool_error}"
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                tool_duration = time.time() - tool_start_time
 								            result_preview = function_result[:200] if len(function_result) > 200 else function_result
-												feat(logging): implement persistent error logging for tool failures

- Introduce a separate error log for capturing warnings and errors related to tool execution, ensuring detailed inspection of issues post-failure.
- Enhance error handling in the AIAgent class to log exceptions with stack traces for better debugging.
- Add a similar error logging mechanism in the gateway to streamline debugging processes.

											
										
										
											2026-02-28 22:49:58 -08:00
+								            # Log tool errors to the persistent error log so [error] tags
 								            # in the UI always have a corresponding detailed entry on disk.
 								            _is_error_result, _ = _detect_tool_failure(function_name, function_result)
 								            if _is_error_result:
 								                logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            if self.verbose_logging:
 								                logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
 								                logging.debug(f"Tool result preview: {result_preview}...")
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            # Guard against tools returning absurdly large content that would
 								            # blow up the context window. 100K chars ≈ 25K tokens — generous
 								            # enough for any reasonable tool output but prevents catastrophic
 								            # context explosions (e.g. accidental base64 image dumps).
 								            MAX_TOOL_RESULT_CHARS = 100_000
 								            if len(function_result) > MAX_TOOL_RESULT_CHARS:
 								                original_len = len(function_result)
 								                function_result = (
 								                    function_result[:MAX_TOOL_RESULT_CHARS]
 								                    + f"\n\n[Truncated: tool response was {original_len:,} chars, "
 								                    f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
 								                )
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								            tool_msg = {
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                "role": "tool",
 								                "content": function_result,
 								                "tool_call_id": tool_call.id
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								            }
 								            messages.append(tool_msg)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								            if not self.quiet_mode:
 								                response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
 								                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
 								            if self._interrupt_requested and i < len(assistant_message.tool_calls):
 								                remaining = len(assistant_message.tool_calls) - i
 								                print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
 								                for skipped_tc in assistant_message.tool_calls[i:]:
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                    skipped_name = skipped_tc.function.name
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                    skip_msg = {
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                        "role": "tool",
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                        "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                        "tool_call_id": skipped_tc.id
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                    }
 								                    messages.append(skip_msg)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                break
 								            if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
 								                time.sleep(self.tool_delay)
-												feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.

											
										
										
											2026-03-09 05:19:42 -07:00
+								        # ── Budget pressure injection ─────────────────────────────────
 								        # After all tool calls in this turn are processed, check if we're
 								        # approaching max_iterations. If so, inject a warning into the LAST
 								        # tool result's JSON so the LLM sees it naturally when reading results.
 								        budget_warning = self._get_budget_warning(api_call_count)
 								        if budget_warning and messages and messages[-1].get("role") == "tool":
 								            last_content = messages[-1]["content"]
 								            try:
 								                parsed = json.loads(last_content)
 								                if isinstance(parsed, dict):
 								                    parsed["_budget_warning"] = budget_warning
 								                    messages[-1]["content"] = json.dumps(parsed, ensure_ascii=False)
 								                else:
 								                    messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
 								            except (json.JSONDecodeError, TypeError):
 								                messages[-1]["content"] = last_content + f"\n\n{budget_warning}"
 								            if not self.quiet_mode:
 								                remaining = self.max_iterations - api_call_count
 								                tier = "⚠️  WARNING" if remaining <= self.max_iterations * 0.1 else "💡 CAUTION"
 								                print(f"{self.log_prefix}{tier}: {remaining} iterations remaining")
 								    def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
 								        """Return a budget pressure string, or None if not yet needed.
 								        Two-tier system:
 								          - Caution (70%): nudge to consolidate work
 								          - Warning (90%): urgent, must respond now
 								        """
 								        if not self._budget_pressure_enabled or self.max_iterations <= 0:
 								            return None
 								        progress = api_call_count / self.max_iterations
 								        remaining = self.max_iterations - api_call_count
 								        if progress >= self._budget_warning_threshold:
 								            return (
 								                f"[BUDGET WARNING: Iteration {api_call_count}/{self.max_iterations}. "
 								                f"Only {remaining} iteration(s) left. "
 								                "Provide your final response NOW. No more tool calls unless absolutely critical.]"
 								            )
 								        if progress >= self._budget_caution_threshold:
 								            return (
 								                f"[BUDGET: Iteration {api_call_count}/{self.max_iterations}. "
 								                f"{remaining} iterations left. Start consolidating your work.]"
 								            )
 								        return None
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								    def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
 								        """Request a summary when max iterations are reached. Returns the final response text."""
 								        print(f"⚠️  Reached maximum iterations ({self.max_iterations}). Requesting summary...")
 								        summary_request = (
 								            "You've reached the maximum number of tool-calling iterations allowed. "
 								            "Please provide a final response summarizing what you've found and accomplished so far, "
 								            "without calling any more tools."
 								        )
 								        messages.append({"role": "user", "content": summary_request})
 								        try:
-												fix: strip internal fields from API messages in _handle_max_iterations

The flush_memories() and run_conversation() code paths already stripped
finish_reason and reasoning from API messages (added in 7a0b377 via PR
#253), but _handle_max_iterations() was missed. It was sending raw
messages.copy() which could include finish_reason, causing 422 errors
on strict APIs like Mistral when the agent hit max iterations.

Now strips the same internal fields consistently across all three API
call sites.

											
										
										
											2026-03-04 21:08:20 -08:00
+								            # Build API messages, stripping internal-only fields
 								            # (finish_reason, reasoning) that strict APIs like Mistral reject with 422
 								            api_messages = []
 								            for msg in messages:
 								                api_msg = msg.copy()
 								                for internal_field in ("reasoning", "finish_reason"):
 								                    api_msg.pop(internal_field, None)
 								                api_messages.append(api_msg)
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            effective_system = self._cached_system_prompt or ""
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            if self.ephemeral_system_prompt:
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
 								            if effective_system:
 								                api_messages = [{"role": "system", "content": effective_system}] + api_messages
 								            if self.prefill_messages:
 								                sys_offset = 1 if effective_system else 0
 								                for idx, pfm in enumerate(self.prefill_messages):
 								                    api_messages.insert(sys_offset + idx, pfm.copy())
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								            summary_extra_body = {}
-												refactor: streamline reasoning configuration checks in AIAgent

- Simplified the logic for determining support for reasoning based on the base URL by introducing clearer variable names.
- Added product attribution for the Nous Portal to the extra body of requests when applicable, enhancing tagging for better tracking.

											
										
										
											2026-02-25 16:49:41 -08:00
+								            _is_openrouter = "openrouter" in self.base_url.lower()
 								            _is_nous = "nousresearch" in self.base_url.lower()
 								            if _is_openrouter or _is_nous:
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                if self.reasoning_config is not None:
 								                    summary_extra_body["reasoning"] = self.reasoning_config
 								                else:
 								                    summary_extra_body["reasoning"] = {
 								                        "enabled": True,
-												feat: default reasoning effort from xhigh to medium

Reduces token usage and latency for most tasks by defaulting to
medium reasoning effort instead of xhigh. Users can still override
via config or CLI flag. Updates code, tests, example config, and docs.

											
										
										
											2026-03-07 10:14:19 -08:00
+								                        "effort": "medium"
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    }
-												refactor: streamline reasoning configuration checks in AIAgent

- Simplified the logic for determining support for reasoning based on the base URL by introducing clearer variable names.
- Added product attribution for the Nous Portal to the extra body of requests when applicable, enhancing tagging for better tracking.

											
										
										
											2026-02-25 16:49:41 -08:00
+								            if _is_nous:
 								                summary_extra_body["tags"] = ["product=hermes-agent"]
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								            if self.api_mode == "codex_responses":
 								                codex_kwargs = self._build_api_kwargs(api_messages)
-												fix: NoneType not iterable error when summarizing at max iterations

In _handle_max_iterations, the codex_responses path set tools=None to
prevent tool calls during summarization. However, the OpenAI SDK's
_make_tools() treats None as a valid value (not its Omit sentinel) and
tries to iterate over it, causing TypeError: 'NoneType' object is not
iterable.

Fix: use codex_kwargs.pop('tools', None) to remove the key entirely,
so the SDK never receives it and uses its default omit behavior.

Fixes #300

											
										
										
											2026-03-03 03:42:44 -08:00
+								                codex_kwargs.pop("tools", None)
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                summary_response = self._run_codex_stream(codex_kwargs)
 								                assistant_message, _ = self._normalize_codex_response(summary_response)
 								                final_response = (assistant_message.content or "").strip() if assistant_message else ""
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            else:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                summary_kwargs = {
 								                    "model": self.model,
 								                    "messages": api_messages,
 								                }
 								                if self.max_tokens is not None:
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-												feat(provider-routing): add OpenRouter provider routing configuration

Introduced a new `provider_routing` section in the CLI configuration to control how requests are routed across providers when using OpenRouter. This includes options for sorting providers by throughput, latency, or price, as well as allowing or ignoring specific providers, setting the order of provider attempts, and managing data collection policies. Updated relevant classes and documentation to support these features, enhancing flexibility in provider selection.

											
										
										
											2026-03-01 18:24:27 -08:00
 								                # Include provider routing preferences
 								                provider_preferences = {}
 								                if self.providers_allowed:
 								                    provider_preferences["only"] = self.providers_allowed
 								                if self.providers_ignored:
 								                    provider_preferences["ignore"] = self.providers_ignored
 								                if self.providers_order:
 								                    provider_preferences["order"] = self.providers_order
 								                if self.provider_sort:
 								                    provider_preferences["sort"] = self.provider_sort
 								                if provider_preferences:
 								                    summary_extra_body["provider"] = provider_preferences
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                if summary_extra_body:
 								                    summary_kwargs["extra_body"] = summary_extra_body
 								                summary_response = self.client.chat.completions.create(**summary_kwargs)
 								                if summary_response.choices and summary_response.choices[0].message.content:
 								                    final_response = summary_response.choices[0].message.content
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                else:
 								                    final_response = ""
 								            if final_response:
 								                if "<think>" in final_response:
 								                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
 								                if final_response:
 								                    messages.append({"role": "assistant", "content": final_response})
 								                else:
 								                    final_response = "I reached the iteration limit and couldn't generate a summary."
 								            else:
 								                # Retry summary generation
 								                if self.api_mode == "codex_responses":
 								                    codex_kwargs = self._build_api_kwargs(api_messages)
-												fix: NoneType not iterable error when summarizing at max iterations

In _handle_max_iterations, the codex_responses path set tools=None to
prevent tool calls during summarization. However, the OpenAI SDK's
_make_tools() treats None as a valid value (not its Omit sentinel) and
tries to iterate over it, causing TypeError: 'NoneType' object is not
iterable.

Fix: use codex_kwargs.pop('tools', None) to remove the key entirely,
so the SDK never receives it and uses its default omit behavior.

Fixes #300

											
										
										
											2026-03-03 03:42:44 -08:00
+								                    codex_kwargs.pop("tools", None)
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    retry_response = self._run_codex_stream(codex_kwargs)
 								                    retry_msg, _ = self._normalize_codex_response(retry_response)
 								                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
 								                else:
 								                    summary_kwargs = {
 								                        "model": self.model,
 								                        "messages": api_messages,
 								                    }
 								                    if self.max_tokens is not None:
-												fix: use _max_tokens_param in max-iterations retry path

The retry summary in _handle_max_iterations hardcodes max_tokens instead
of calling _max_tokens_param(). For direct OpenAI API users (gpt-4o,
o-series), the correct parameter name is max_completion_tokens. The first
attempt at line 2697 already uses _max_tokens_param correctly but the
retry path at line 2743 was missed.

											
										
										
											2026-03-05 17:49:37 +03:00
+								                        summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-												refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults.
- Updated the context compressor's summary target tokens to 2500 for improved performance.
- Added external credential detection for Codex CLI to streamline authentication.
- Refactored various components to ensure consistent handling of authentication and model selection across the application.

											
										
										
											2026-02-28 21:47:51 -08:00
+								                    if summary_extra_body:
 								                        summary_kwargs["extra_body"] = summary_extra_body
 								                    summary_response = self.client.chat.completions.create(**summary_kwargs)
 								                    if summary_response.choices and summary_response.choices[0].message.content:
 								                        final_response = summary_response.choices[0].message.content
 								                    else:
 								                        final_response = ""
 								                if final_response:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    if "<think>" in final_response:
 								                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
-												fix: add missing empty-content guard after think-block stripping in retry path

											
										
										
											2026-03-05 18:57:59 +03:00
+								                    if final_response:
 								                        messages.append({"role": "assistant", "content": final_response})
 								                    else:
 								                        final_response = "I reached the iteration limit and couldn't generate a summary."
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                else:
 								                    final_response = "I reached the iteration limit and couldn't generate a summary."
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
 								        except Exception as e:
 								            logging.warning(f"Failed to get summary response: {e}")
 								            final_response = f"I reached the maximum iterations ({self.max_iterations}) but couldn't summarize. Error: {str(e)}"
 								        return final_response
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    def run_conversation(
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
+								        self,
 								        user_message: str,
 								        system_message: str = None,
 								        conversation_history: List[Dict[str, Any]] = None,
 								        task_id: str = None
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    ) -> Dict[str, Any]:
 								        """
 								        Run a complete conversation with tool calling until completion.
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        Args:
 								            user_message (str): The user's message/question
-												Add ephemeral system prompt support in batch and agent runners. Update README with usage examples and documentation for the new feature. Ensure prompt is not saved to trajectories.

											
										
										
											2025-10-08 02:33:58 +00:00
+								            system_message (str): Custom system message (optional, overrides ephemeral_system_prompt if provided)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            conversation_history (List[Dict]): Previous conversation messages (optional)
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
+								            task_id (str): Unique identifier for this task to isolate VMs between concurrent tasks (optional, auto-generated if not provided)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        Returns:
 								            Dict: Complete conversation result with final response and message history
 								        """
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
+								        # Generate unique task_id if not provided to isolate VMs between concurrent tasks
 								        effective_task_id = task_id or str(uuid.uuid4())
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
-												Critical: fix bug in new subagent tool call budget to not be session-level but tool call loop level

											
										
										
											2026-03-07 10:32:51 -08:00
+								        # Reset retry counters and iteration budget at the start of each turn
 								        # so subagent usage from a previous turn doesn't eat into the next one.
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								        self._invalid_tool_retries = 0
 								        self._invalid_json_retries = 0
 								        self._empty_content_retries = 0
-												fix: reset all retry counters at start of run_conversation()

_incomplete_scratchpad_retries and _codex_incomplete_retries were not
reset at the start of run_conversation(). In CLI mode, where the same
AIAgent instance is reused across conversations, stale counters from
a previous conversation could carry over, causing premature retry
exhaustion and partial responses.

											
										
										
											2026-03-07 20:12:08 +03:00
+								        self._incomplete_scratchpad_retries = 0
 								        self._codex_incomplete_retries = 0
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								        self._last_content_with_tools = None
 								        self._turns_since_memory = 0
 								        self._iters_since_skill = 0
-												Critical: fix bug in new subagent tool call budget to not be session-level but tool call loop level

											
										
										
											2026-03-07 10:32:51 -08:00
+								        self.iteration_budget = IterationBudget(self.max_iterations)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
-												fix(agent): copy conversation_history to avoid mutating caller's list

											
										
										
											2026-03-01 03:06:13 +03:00
+								        # Initialize conversation (copy to avoid mutating the caller's list)
 								        messages = list(conversation_history) if conversation_history else []
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Add todo tool for agent task planning and management

Single `todo` tool that reads (no params) or writes (provide todos array
with merge flag). In-memory TodoStore on AIAgent, no system prompt
mutation, behavioral guidance in tool description only. State re-injected
after context compression events. Gateway sessions hydrate from
conversation history. Added to all platform toolsets.

Also wired into RL agent_loop.py with per-run TodoStore and fixed
browser_snapshot user_task passthrough from first user message.

											
										
										
											2026-02-17 17:02:33 -08:00
+								        # Hydrate todo store from conversation history (gateway creates a fresh
 								        # AIAgent per message, so the in-memory store is empty -- we need to
 								        # recover the todo state from the most recent todo tool response in history)
 								        if conversation_history and not self._todo_store.has_items():
 								            self._hydrate_todo_store(conversation_history)
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								        # Prefill messages (few-shot priming) are injected at API-call time only,
 								        # never stored in the messages list. This keeps them ephemeral: they won't
 								        # be saved to session DB, session logs, or batch trajectories, but they're
 								        # automatically re-applied on every API call (including session continuations).
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        # Track user turns for memory flush and periodic nudge logic
 								        self._user_turn_count += 1
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								        # Preserve the original user message before nudge injection.
 								        # Honcho should receive the actual user input, not system nudges.
 								        original_user_message = user_message
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								        # Periodic memory nudge: remind the model to consider saving memories.
 								        # Counter resets whenever the memory tool is actually used.
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								        if (self._memory_nudge_interval > 0
 								                and "memory" in self.valid_tool_names
 								                and self._memory_store):
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								            self._turns_since_memory += 1
 								            if self._turns_since_memory >= self._memory_nudge_interval:
 								                user_message += (
 								                    "\n\n[System: You've had several exchanges in this session. "
 								                    "Consider whether there's anything worth saving to your memories.]"
 								                )
 								                self._turns_since_memory = 0
 								        # Skill creation nudge: fires on the first user message after a long tool loop.
 								        # The counter increments per API iteration in the tool loop and is checked here.
 								        if (self._skill_nudge_interval > 0
 								                and self._iters_since_skill >= self._skill_nudge_interval
 								                and "skill_manage" in self.valid_tool_names):
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								            user_message += (
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								                "\n\n[System: The previous task involved many steps. "
 								                "If you discovered a reusable workflow, consider saving it as a skill.]"
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
+								            )
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								            self._iters_since_skill = 0
-												feat: enhance memory management features in AIAgent and CLI

- Added configuration options for memory nudge interval and flush minimum turns in cli-config.yaml.example.
- Implemented memory flushing before conversation reset, clearing, and exit in the CLI to ensure memories are saved.
- Introduced a flush_memories method in AIAgent to handle memory persistence before context loss.
- Added periodic nudges to remind the agent to consider saving memories based on user interactions.

											
										
										
											2026-02-22 10:15:17 -08:00
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								        # Honcho prefetch: retrieve user context for system prompt injection.
 								        # Only on the FIRST turn of a session (empty history).  On subsequent
 								        # turns the model already has all prior context in its conversation
 								        # history, and the Honcho context is baked into the stored system
 								        # prompt — re-fetching it would change the system message and break
 								        # Anthropic prompt caching.
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								        self._honcho_context = ""
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								        if self._honcho and self._honcho_session_key and not conversation_history:
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
+								            try:
 								                self._honcho_context = self._honcho_prefetch(user_message)
 								            except Exception as e:
 								                logger.debug("Honcho prefetch failed (non-fatal): %s", e)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        # Add user message
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								        user_msg = {"role": "user", "content": user_message}
 								        messages.append(user_msg)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								        if not self.quiet_mode:
 								            print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        # ── System prompt (cached per session for prefix caching) ──
 								        # Built once on first call, reused for all subsequent calls.
 								        # Only rebuilt after context compression events (which invalidate
 								        # the cache and reload memory from disk).
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								        #
 								        # For continuing sessions (gateway creates a fresh AIAgent per
 								        # message), we load the stored system prompt from the session DB
 								        # instead of rebuilding.  Rebuilding would pick up memory changes
 								        # from disk that the model already knows about (it wrote them!),
 								        # producing a different system prompt and breaking the Anthropic
 								        # prefix cache.
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        if self._cached_system_prompt is None:
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								            stored_prompt = None
 								            if conversation_history and self._session_db:
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								                try:
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								                    session_row = self._session_db.get_session(self.session_id)
 								                    if session_row:
 								                        stored_prompt = session_row.get("system_prompt") or None
 								                except Exception:
 								                    pass  # Fall through to build fresh
 								            if stored_prompt:
 								                # Continuing session — reuse the exact system prompt from
 								                # the previous turn so the Anthropic cache prefix matches.
 								                self._cached_system_prompt = stored_prompt
 								            else:
 								                # First turn of a new session — build from scratch.
 								                self._cached_system_prompt = self._build_system_prompt(system_message)
 								                # Bake Honcho context into the prompt so it's stable for
 								                # the entire session (not re-fetched per turn).
 								                if self._honcho_context:
 								                    self._cached_system_prompt = (
 								                        self._cached_system_prompt + "\n\n" + self._honcho_context
 								                    ).strip()
 								                # Store the system prompt snapshot in SQLite
 								                if self._session_db:
 								                    try:
 								                        self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
 								                    except Exception as e:
 								                        logger.debug("Session DB update_system_prompt failed: %s", e)
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
-												feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments

											
										
										
											2026-02-19 00:57:31 -08:00
+								        active_system_prompt = self._cached_system_prompt
-												Add platform-specific formatting hints and identity for AIAgent

- Introduced a default agent identity prompt to ensure consistent behavior across platforms.
- Added platform-specific formatting hints for CLI, WhatsApp, Telegram, and Discord to guide the agent's output style.
- Updated the AIAgent initialization to accept a platform parameter, enhancing adaptability to different interfaces.

											
										
										
											2026-02-12 16:11:16 -08:00
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								        # ── Preflight context compression ──
 								        # Before entering the main loop, check if the loaded conversation
 								        # history already exceeds the model's context threshold.  This handles
 								        # cases where a user switches to a model with a smaller context window
 								        # while having a large existing session — compress proactively rather
 								        # than waiting for an API error (which might be caught as a non-retryable
 								        # 4xx and abort the request entirely).
 								        if (
 								            self.compression_enabled
 								            and len(messages) > self.context_compressor.protect_first_n
 								                                + self.context_compressor.protect_last_n + 1
 								        ):
 								            _sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
 								            _msg_tok_est = estimate_messages_tokens_rough(messages)
 								            _preflight_tokens = _sys_tok_est + _msg_tok_est
 								            if _preflight_tokens >= self.context_compressor.threshold_tokens:
 								                logger.info(
 								                    "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
 								                    f"{_preflight_tokens:,}",
 								                    f"{self.context_compressor.threshold_tokens:,}",
 								                    self.model,
 								                    f"{self.context_compressor.context_length:,}",
 								                )
 								                if not self.quiet_mode:
 								                    print(
 								                        f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
 								                        f">= {self.context_compressor.threshold_tokens:,} threshold"
 								                    )
 								                # May need multiple passes for very large sessions with small
 								                # context windows (each pass summarises the middle N turns).
 								                for _pass in range(3):
 								                    _orig_len = len(messages)
 								                    messages, active_system_prompt = self._compress_context(
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								                        messages, system_message, approx_tokens=_preflight_tokens,
 								                        task_id=effective_task_id,
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								                    )
 								                    if len(messages) >= _orig_len:
 								                        break  # Cannot compress further
 								                    # Re-estimate after compression
 								                    _sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
 								                    _msg_tok_est = estimate_messages_tokens_rough(messages)
 								                    _preflight_tokens = _sys_tok_est + _msg_tok_est
 								                    if _preflight_tokens < self.context_compressor.threshold_tokens:
 								                        break  # Under threshold
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        # Main conversation loop
 								        api_call_count = 0
 								        final_response = None
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        interrupted = False
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
+								        codex_ack_continuations = 0
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
+								        length_continue_retries = 0
 								        truncated_response_prefix = ""
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								        # Clear any stale interrupt state at start
 								        self.clear_interrupt()
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								        while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
-												feat: filesystem checkpoints and /rollback command

Automatic filesystem snapshots before destructive file operations,
with user-facing rollback.  Inspired by PR #559 (by @alireza78a).

Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot

Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)

Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git

Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference

											
										
										
											2026-03-10 00:49:15 -07:00
+								            # Reset per-turn checkpoint dedup so each iteration can take one snapshot
 								            self._checkpoint_mgr.new_turn()
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            # Check for interrupt request (e.g., user sent new message)
 								            if self._interrupt_requested:
 								                interrupted = True
 								                if not self.quiet_mode:
 								                    print(f"\n⚡ Breaking out of tool loop due to interrupt...")
 								                break
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            api_call_count += 1
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
+								            if not self.iteration_budget.consume():
 								                if not self.quiet_mode:
 								                    print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
 								                break
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
-												feat(hooks): introduce event hooks system for lifecycle management

Add a new hooks system allowing users to run custom code at key lifecycle points in the agent's operation. This includes support for events such as `gateway:startup`, `session:start`, `agent:step`, and more. Documentation for creating hooks and available events has been added to `README.md` and a new `hooks.md` file. Additionally, integrate step callbacks in the agent to facilitate hook execution during tool-calling iterations.

											
										
										
											2026-02-28 17:09:26 -08:00
+								            # Fire step_callback for gateway hooks (agent:step event)
 								            if self.step_callback is not None:
 								                try:
 								                    prev_tools = []
 								                    for _m in reversed(messages):
 								                        if _m.get("role") == "assistant" and _m.get("tool_calls"):
 								                            prev_tools = [
 								                                tc["function"]["name"]
 								                                for tc in _m["tool_calls"]
 								                                if isinstance(tc, dict)
 								                            ]
 								                            break
 								                    self.step_callback(api_call_count, prev_tools)
 								                except Exception as _step_err:
 								                    logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								            # Track tool-calling iterations for skill nudge.
 								            # Counter resets whenever skill_manage is actually used.
-												feat: introduce skills management features in AIAgent and CLI

- Added skills configuration options in cli-config.yaml.example, including a nudge interval for skill creation reminders.
- Implemented skills guidance in AIAgent to prompt users to save reusable workflows after complex tasks.
- Enhanced skills indexing in the prompt builder to include descriptions from SKILL.md files for better context.
- Updated the agent's behavior to periodically remind users about potential skills during tool-calling iterations.

											
										
										
											2026-02-22 13:28:13 -08:00
+								            if (self._skill_nudge_interval > 0
 								                    and "skill_manage" in self.valid_tool_names):
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								                self._iters_since_skill += 1
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
 								            # Prepare messages for API call
 								            # If we have an ephemeral system prompt, prepend it to the messages
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								            # Note: Reasoning is embedded in content via <think> tags for trajectory storage.
 								            # However, providers like Moonshot AI require a separate 'reasoning_content' field
 								            # on assistant messages with tool_calls. We handle both cases here.
 								            api_messages = []
 								            for msg in messages:
 								                api_msg = msg.copy()
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								                # For ALL assistant messages, pass reasoning back to the API
 								                # This ensures multi-turn reasoning context is preserved
 								                if msg.get("role") == "assistant":
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                    reasoning_text = msg.get("reasoning")
 								                    if reasoning_text:
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        api_msg["reasoning_content"] = reasoning_text
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                # Remove 'reasoning' field - it's for trajectory storage only
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								                # We've copied it to 'reasoning_content' for the API above
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                if "reasoning" in api_msg:
 								                    api_msg.pop("reasoning")
-												fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors (#253)

* fix(agent): skip reasoning param for Mistral API to prevent 422 errors

* fix(agent): strip finish_reason from assistant messages to fix Mistral 422 errors
											
										
										
											2026-03-02 11:35:03 +03:00
+								                # Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
 								                if "finish_reason" in api_msg:
 								                    api_msg.pop("finish_reason")
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								                # Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
 								                # The signature field helps maintain reasoning continuity
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                api_messages.append(api_msg)
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            # Build the final system message: cached prompt + ephemeral system prompt.
 								            # The ephemeral part is appended here (not baked into the cached prompt)
 								            # so it stays out of the session DB and logs.
-												fix: stabilize system prompt across gateway turns for cache hits

Two changes to prevent unnecessary Anthropic prompt cache misses in the
gateway, where a fresh AIAgent is created per user message:

1. Reuse stored system prompt for continuing sessions:
   When conversation_history is non-empty, load the system prompt from
   the session DB instead of rebuilding from disk. The model already has
   updated memory in its conversation history (it wrote it!), so
   re-reading memory from disk produces a different system prompt that
   breaks the cache prefix.

2. Stabilize Honcho context per session:
   - Only prefetch Honcho context on the first turn (empty history)
   - Bake Honcho context into the cached system prompt and store to DB
   - Remove the per-turn Honcho injection from the API call loop

   This ensures the system message is identical across all turns in a
   session. Previously, re-fetching Honcho could return different context
   on each turn, changing the system message and invalidating the cache.

Both changes preserve the existing behavior for compression (which
invalidates the prompt and rebuilds from scratch) and for the CLI
(where the same AIAgent persists and the cached prompt is already
stable across turns).

Tests: 2556 passed (6 new)

											
										
										
											2026-03-09 01:50:58 -07:00
+								            # Note: Honcho context is baked into _cached_system_prompt on the first
 								            # turn and stored in the session DB, so it does NOT need to be injected
 								            # here.  This keeps the system message identical across all turns in a
 								            # session, maximizing Anthropic prompt cache hits.
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            effective_system = active_system_prompt or ""
 								            if self.ephemeral_system_prompt:
 								                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
 								            if effective_system:
 								                api_messages = [{"role": "system", "content": effective_system}] + api_messages
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												feat: add ephemeral prefill messages and system prompt loading

- Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent.
- Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time.
- Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience.
- Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence.

											
										
										
											2026-02-23 23:55:42 -08:00
+								            # Inject ephemeral prefill messages right after the system prompt
 								            # but before conversation history. Same API-call-time-only pattern.
 								            if self.prefill_messages:
 								                sys_offset = 1 if effective_system else 0
 								                for idx, pfm in enumerate(self.prefill_messages):
 								                    api_messages.insert(sys_offset + idx, pfm.copy())
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
+								            # Apply Anthropic prompt caching for Claude models via OpenRouter.
 								            # Auto-detected: if model name contains "claude" and base_url is OpenRouter,
 								            # inject cache_control breakpoints (system + last 3 messages) to reduce
 								            # input token costs by ~75% on multi-turn conversations.
 								            if self._use_prompt_caching:
 								                api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl)
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
-												fix: sanitize orphaned tool-call/result pairs in message compression

Enhance message compression by adding a method to clean up orphaned tool-call and tool-result pairs. This ensures that the API receives well-formed messages, preventing errors related to mismatched IDs. The new functionality includes removing orphaned results and adding stub results for missing calls, improving overall message integrity during compression.

											
										
										
											2026-03-07 08:08:00 -08:00
+								            # Safety net: strip orphaned tool results / add stubs for missing
 								            # results before sending to the API.  The compressor handles this
 								            # during compression, but orphans can also sneak in from session
 								            # loading or manual message manipulation.
 								            if hasattr(self, 'context_compressor') and self.context_compressor:
 								                api_messages = self.context_compressor._sanitize_tool_pairs(api_messages)
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								            # Calculate approximate request size for logging
 								            total_chars = sum(len(str(msg)) for msg in api_messages)
 								            approx_tokens = total_chars // 4  # Rough estimate: 4 chars per token
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								            # Thinking spinner for quiet mode (animated during API call)
 								            thinking_spinner = None
 								            if not self.quiet_mode:
 								                print(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
 								                print(f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
 								                print(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
 								            else:
 								                # Animated thinking spinner in quiet mode
 								                face = random.choice(KawaiiSpinner.KAWAII_THINKING)
 								                verb = random.choice(KawaiiSpinner.THINKING_VERBS)
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								                if self.thinking_callback:
 								                    # CLI TUI mode: use prompt_toolkit widget instead of raw spinner
 								                    self.thinking_callback(f"{face} {verb}...")
 								                else:
 								                    spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
 								                    thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
 								                    thinking_spinner.start()
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								            # Log request details if verbose
 								            if self.verbose_logging:
 								                logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}")
 								                logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}")
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                logging.debug(f"Total message size: ~{approx_tokens:,} tokens")
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
 								            api_start_time = time.time()
 								            retry_count = 0
-												fix: reduce max_retries to 3 and make ValueError/TypeError non-retryable

- max_retries reduced from 6 to 3 — 6 retries with exponential backoff
  could stall for ~275s total on persistent errors
- ValueError and TypeError now detected as non-retryable client errors
  and abort immediately instead of being retried with backoff (these are
  local validation/programming errors that will never succeed on retry)

											
										
										
											2026-03-11 07:04:46 -07:00
+								            max_retries = 3
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								            compression_attempts = 0
 								            max_compression_attempts = 3
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								            codex_auth_retry_attempted = False
-												fix: implement Nous credential refresh on 401 error for retry logic

											
										
										
											2026-03-07 13:34:23 +11:00
+								            nous_auth_retry_attempted = False
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
+								            restart_with_compressed_messages = False
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
+								            restart_with_length_continuation = False
-												rate limits

											
										
										
											2025-11-17 18:35:36 -05:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								            finish_reason = "stop"
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								            response = None  # Guard against UnboundLocalError if all retries fail
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								            while retry_count < max_retries:
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                try:
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    api_kwargs = self._build_api_kwargs(api_messages)
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
+								                    if self.api_mode == "codex_responses":
 								                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
-												feat: add multi-provider authentication and inference provider selection

- Implemented a multi-provider authentication system for the Hermes Agent, supporting OAuth for Nous Portal and traditional API key methods for OpenRouter and custom endpoints.
- Enhanced CLI with commands for logging in and out of providers, allowing users to authenticate and manage their credentials easily.
- Updated configuration options to select inference providers, with detailed documentation on usage and setup.
- Improved status reporting to include authentication status and provider details, enhancing user awareness of their current configuration.
- Added new files for authentication handling and updated existing components to integrate the new provider system.

											
										
										
											2026-02-20 17:24:00 -08:00
 								                    if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
 								                        self._dump_api_request_debug(api_kwargs, reason="preflight")
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                    response = self._interruptible_api_call(api_kwargs)
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
 								                    api_duration = time.time() - api_start_time
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												refactor: streamline thinking spinner behavior in AIAgent

- Updated the logic for stopping the thinking spinner to improve clarity in tool execution messages.
- Removed unnecessary checks for tool calls, simplifying the spinner's stop behavior while maintaining informative output for users.

											
										
										
											2026-02-19 01:56:04 -08:00
+								                    # Stop thinking spinner silently -- the response box or tool
 								                    # execution messages that follow are more informative.
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                    if thinking_spinner:
-												refactor: streamline thinking spinner behavior in AIAgent

- Updated the logic for stopping the thinking spinner to improve clarity in tool execution messages.
- Removed unnecessary checks for tool calls, simplifying the spinner's stop behavior while maintaining informative output for users.

											
										
										
											2026-02-19 01:56:04 -08:00
+								                        thinking_spinner.stop("")
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                        thinking_spinner = None
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								                    if self.thinking_callback:
 								                        self.thinking_callback("")
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
 								                    if not self.quiet_mode:
 								                        print(f"{self.log_prefix}⏱️  API call completed in {api_duration:.2f}s")
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
 								                    if self.verbose_logging:
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        # Log response with provider info if available
 								                        resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
 								                        logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    # Validate response shape before proceeding
 								                    response_invalid = False
 								                    error_details = []
 								                    if self.api_mode == "codex_responses":
 								                        output_items = getattr(response, "output", None) if response is not None else None
 								                        if response is None:
 								                            response_invalid = True
 								                            error_details.append("response is None")
 								                        elif not isinstance(output_items, list):
 								                            response_invalid = True
 								                            error_details.append("response.output is not a list")
 								                        elif len(output_items) == 0:
 								                            response_invalid = True
 								                            error_details.append("response.output is empty")
 								                    else:
 								                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
 								                            response_invalid = True
 								                            if response is None:
 								                                error_details.append("response is None")
 								                            elif not hasattr(response, 'choices'):
 								                                error_details.append("response has no 'choices' attribute")
 								                            elif response.choices is None:
 								                                error_details.append("response.choices is None")
 								                            else:
 								                                error_details.append("response.choices is empty")
 								                    if response_invalid:
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                        # Stop spinner before printing error messages
 								                        if thinking_spinner:
 								                            thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
 								                            thinking_spinner = None
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								                        if self.thinking_callback:
 								                            self.thinking_callback("")
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        # This is often rate limiting or provider returning malformed response
 								                        retry_count += 1
 								                        # Check for error field in response (some providers include this)
 								                        error_msg = "Unknown"
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        provider_name = "Unknown"
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        if response and hasattr(response, 'error') and response.error:
 								                            error_msg = str(response.error)
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                            # Try to extract provider from error metadata
 								                            if hasattr(response.error, 'metadata') and response.error.metadata:
 								                                provider_name = response.error.metadata.get('provider_name', 'Unknown')
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        elif response and hasattr(response, 'message') and response.message:
 								                            error_msg = str(response.message)
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        # Try to get provider from model field (OpenRouter often returns actual model used)
 								                        if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
 								                            provider_name = f"model={response.model}"
 								                        # Check for x-openrouter-provider or similar metadata
 								                        if provider_name == "Unknown" and response:
 								                            # Log all response attributes for debugging
 								                            resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
 								                            if self.verbose_logging:
 								                                logging.debug(f"Response attributes for invalid response: {resp_attrs}")
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        print(f"{self.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        print(f"{self.log_prefix}   🏢 Provider: {provider_name}")
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        print(f"{self.log_prefix}   📝 Provider message: {error_msg[:200]}")
 								                        print(f"{self.log_prefix}   ⏱️  Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
-												fix: correct off-by-one in retry exhaustion checks

The retry exhaustion checks used > instead of >= to compare
retry_count against max_retries. Since the while loop condition is
retry_count < max_retries, the check retry_count > max_retries can
never be true inside the loop. When retries are exhausted, the loop
exits and falls through to response.choices[0] on an invalid response,
crashing with IndexError instead of returning a proper error.

											
										
										
											2026-03-01 02:27:26 +03:00
+								                        if retry_count >= max_retries:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                            # Try fallback before giving up
 								                            if self._try_activate_fallback():
 								                                retry_count = 0
 								                                continue
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                            print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
 								                            logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                            return {
 								                                "messages": messages,
 								                                "completed": False,
 								                                "api_calls": api_call_count,
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                                "error": "Invalid API response shape. Likely rate limited or malformed provider response.",
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                                "failed": True  # Mark as failure for filtering
 								                            }
 								                        # Longer backoff for rate limiting (likely cause of None choices)
 								                        wait_time = min(5 * (2 ** (retry_count - 1)), 120)  # 5s, 10s, 20s, 40s, 80s, 120s
 								                        print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                        logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
 								                        # Sleep in small increments to stay responsive to interrupts
 								                        sleep_end = time.time() + wait_time
 								                        while time.time() < sleep_end:
 								                            if self._interrupt_requested:
 								                                print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                                self._persist_session(messages, conversation_history)
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                                self.clear_interrupt()
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                                return {
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                                    "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                                    "messages": messages,
 								                                    "api_calls": api_call_count,
 								                                    "completed": False,
 								                                    "interrupted": True,
 								                                }
 								                            time.sleep(0.2)
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        continue  # Retry the API call
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                    # Check finish_reason before proceeding
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    if self.api_mode == "codex_responses":
 								                        status = getattr(response, "status", None)
 								                        incomplete_details = getattr(response, "incomplete_details", None)
 								                        incomplete_reason = None
 								                        if isinstance(incomplete_details, dict):
 								                            incomplete_reason = incomplete_details.get("reason")
 								                        else:
 								                            incomplete_reason = getattr(incomplete_details, "reason", None)
 								                        if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
 								                            finish_reason = "length"
 								                        else:
 								                            finish_reason = "stop"
 								                    else:
 								                        finish_reason = response.choices[0].finish_reason
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                    if finish_reason == "length":
 								                        print(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens")
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
 								                        if self.api_mode == "chat_completions":
 								                            assistant_message = response.choices[0].message
 								                            if not assistant_message.tool_calls:
 								                                length_continue_retries += 1
 								                                interim_msg = self._build_assistant_message(assistant_message, finish_reason)
 								                                messages.append(interim_msg)
 								                                if assistant_message.content:
 								                                    truncated_response_prefix += assistant_message.content
 								                                if length_continue_retries < 3:
 								                                    print(
 								                                        f"{self.log_prefix}↻ Requesting continuation "
 								                                        f"({length_continue_retries}/3)..."
 								                                    )
 								                                    continue_msg = {
 								                                        "role": "user",
 								                                        "content": (
 								                                            "[System: Your previous response was truncated by the output "
 								                                            "length limit. Continue exactly where you left off. Do not "
 								                                            "restart or repeat prior text. Finish the answer directly.]"
 								                                        ),
 								                                    }
 								                                    messages.append(continue_msg)
 								                                    self._session_messages = messages
 								                                    self._save_session_log(messages)
 								                                    restart_with_length_continuation = True
 								                                    break
 								                                partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
 								                                self._cleanup_task_resources(effective_task_id)
 								                                self._persist_session(messages, conversation_history)
 								                                return {
 								                                    "final_response": partial_response or None,
 								                                    "messages": messages,
 								                                    "api_calls": api_call_count,
 								                                    "completed": False,
 								                                    "partial": True,
 								                                    "error": "Response remained truncated after 3 continuation attempts",
 								                                }
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        # If we have prior messages, roll back to last complete state
 								                        if len(messages) > 1:
 								                            print(f"{self.log_prefix}   ⏪ Rolling back to last complete assistant turn")
 								                            rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								                            self._cleanup_task_resources(effective_task_id)
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                            return {
 								                                "final_response": None,
 								                                "messages": rolled_back_messages,
 								                                "api_calls": api_call_count,
 								                                "completed": False,
 								                                "partial": True,
 								                                "error": "Response truncated due to output length limit"
 								                            }
 								                        else:
 								                            # First message was truncated - mark as failed
 								                            print(f"{self.log_prefix}❌ First response truncated - cannot recover")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                            return {
 								                                "final_response": None,
 								                                "messages": messages,
 								                                "api_calls": api_call_count,
 								                                "completed": False,
 								                                "failed": True,
 								                                "error": "First response truncated due to output length limit"
 								                            }
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                    # Track actual token usage from response for context management
 								                    if hasattr(response, 'usage') and response.usage:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                        if self.api_mode == "codex_responses":
 								                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
 								                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
 								                            total_tokens = (
 								                                getattr(response.usage, 'total_tokens', None)
 								                                or (prompt_tokens + completion_tokens)
 								                            )
 								                        else:
 								                            prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
 								                            completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
 								                            total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                        usage_dict = {
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                            "prompt_tokens": prompt_tokens,
 								                            "completion_tokens": completion_tokens,
 								                            "total_tokens": total_tokens,
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                        }
 								                        self.context_compressor.update_from_response(usage_dict)
-												feat(cli): add /usage command to display session token usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.

											
										
										
											2026-03-01 00:23:19 -08:00
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								                        # Cache discovered context length after successful call
 								                        if self.context_compressor._context_probed:
 								                            ctx = self.context_compressor.context_length
 								                            save_context_length(self.model, self.base_url, ctx)
 								                            print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
 								                            self.context_compressor._context_probed = False
-												feat(cli): add /usage command to display session token usage

Introduced a new command "/usage" in the CLI to show cumulative token usage for the current session. This includes details on prompt tokens, completion tokens, total tokens, API calls, and context state. Updated command documentation to reflect this addition. Enhanced the AIAgent class to track token usage throughout the session.

											
										
										
											2026-03-01 00:23:19 -08:00
+								                        self.session_prompt_tokens += prompt_tokens
 								                        self.session_completion_tokens += completion_tokens
 								                        self.session_total_tokens += total_tokens
 								                        self.session_api_calls += 1
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
 								                        if self.verbose_logging:
 								                            logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
-												Implement Anthropic prompt caching for Claude models via OpenRouter

- Introduced a caching strategy that reduces input token costs by ~75% on multi-turn conversations by caching the conversation prefix.
- Added functions to apply cache control markers to messages, enhancing efficiency in token usage.
- Updated AIAgent to auto-enable prompt caching for Claude models, with configurable cache TTL.
- Enhanced logging to track cache hit statistics when caching is active, improving monitoring of token usage.

											
										
										
											2026-02-10 06:49:41 +00:00
 								                        # Log cache hit stats when prompt caching is active
 								                        if self._use_prompt_caching:
 								                            details = getattr(response.usage, 'prompt_tokens_details', None)
 								                            cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
 								                            written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
 								                            prompt = usage_dict["prompt_tokens"]
 								                            hit_pct = (cached / prompt * 100) if prompt > 0 else 0
 								                            if not self.quiet_mode:
 								                                print(f"{self.log_prefix}   💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                    break  # Success, exit retry loop
-												rate limits

											
										
										
											2025-11-17 18:35:36 -05:00
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                except InterruptedError:
 								                    if thinking_spinner:
 								                        thinking_spinner.stop("")
 								                        thinking_spinner = None
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								                    if self.thinking_callback:
 								                        self.thinking_callback("")
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                    api_elapsed = time.time() - api_start_time
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                    print(f"{self.log_prefix}⚡ Interrupted during API call.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                    self._persist_session(messages, conversation_history)
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                    interrupted = True
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                    final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								                    break
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                except Exception as api_error:
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                    # Stop spinner before printing error messages
 								                    if thinking_spinner:
 								                        thinking_spinner.stop(f"(╥_╥) error, retrying...")
 								                        thinking_spinner = None
-												fix: stop terminal border flashing with steady cursor and TUI spinner widget

Cherry-picked and improved from PR #470 (fixes #464).

Problem: On Ubuntu 24.04 with ghostty + tmux, the prompt input box
border lines flash due to cursor blink and raw spinner terminal writes
conflicting with prompt_toolkit's rendering.

Changes:
- cli.py: Add CursorShape.BLOCK to Application() to disable cursor blink
- cli.py: Add thinking_callback + spinner_widget in TUI layout so
  thinking status displays as a proper prompt_toolkit widget instead of
  raw terminal writes that conflict with the TUI renderer
- run_agent.py: Add thinking_callback parameter to AIAgent; when set,
  uses the callback instead of KawaiiSpinner for thinking display

What was NOT changed (preserving existing behavior):
- agent/display.py: Untouched. KawaiiSpinner _write() stdout capture,
  _animate() logic, and 0.12s frame interval all preserved. This
  protects subagent stdout redirection and keeps smooth animations
  for non-CLI contexts (gateway, batch runner).
- Original emoji spinner types (brain/sparkle/pulse/moon/star) preserved
  for all non-CLI contexts.

Fixes from original PR #470:
- CursorShape.STEADY_BLOCK -> CursorShape.BLOCK (STEADY_BLOCK doesn't
  exist in prompt_toolkit 3.0.52)
- Removed duplicate self._spinner_text = '' line
- Removed redundant nested if-checks

Tested: 2706 tests pass, interactive CLI verified via tmux.

											
										
										
											2026-03-09 23:26:43 -07:00
+								                    if self.thinking_callback:
 								                        self.thinking_callback("")
-												Harden Codex auth refresh and responses compatibility

											
										
										
											2026-02-25 19:27:54 -08:00
 								                    status_code = getattr(api_error, "status_code", None)
 								                    if (
 								                        self.api_mode == "codex_responses"
 								                        and self.provider == "openai-codex"
 								                        and status_code == 401
 								                        and not codex_auth_retry_attempted
 								                    ):
 								                        codex_auth_retry_attempted = True
 								                        if self._try_refresh_codex_client_credentials(force=True):
 								                            print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
 								                            continue
-												fix: implement Nous credential refresh on 401 error for retry logic

											
										
										
											2026-03-07 13:34:23 +11:00
+								                    if (
 								                        self.api_mode == "chat_completions"
 								                        and self.provider == "nous"
 								                        and status_code == 401
 								                        and not nous_auth_retry_attempted
 								                    ):
 								                        nous_auth_retry_attempted = True
 								                        if self._try_refresh_nous_client_credentials(force=True):
 								                            print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
 								                            continue
-												feat(agent): enhance reasoning handling and configuration

Added support for processing encrypted reasoning content within the AIAgent class. Introduced logic to determine reasoning effort and enable/disable reasoning based on configuration settings. Updated the kwargs to reflect these changes, ensuring proper handling of reasoning parameters during agent execution.

											
										
										
											2026-03-01 16:15:20 -08:00
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                    retry_count += 1
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    elapsed_time = time.time() - api_start_time
 								                    # Enhanced error logging
 								                    error_type = type(api_error).__name__
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    error_msg = str(api_error).lower()
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
 								                    print(f"{self.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}")
 								                    print(f"{self.log_prefix}   ⏱️  Time elapsed before failure: {elapsed_time:.2f}s")
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    print(f"{self.log_prefix}   📝 Error: {str(api_error)[:200]}")
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    print(f"{self.log_prefix}   📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                    # Check for interrupt before deciding to retry
 								                    if self._interrupt_requested:
 								                        print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                        self._persist_session(messages, conversation_history)
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                        self.clear_interrupt()
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                        return {
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                            "final_response": f"Operation interrupted: handling API error ({error_type}: {str(api_error)[:80]}).",
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                            "messages": messages,
 								                            "api_calls": api_call_count,
 								                            "completed": False,
 								                            "interrupted": True,
 								                        }
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
+								                    # Check for 413 payload-too-large BEFORE generic 4xx handler.
 								                    # A 413 is a payload-size error — the correct response is to
 								                    # compress history and retry, not abort immediately.
 								                    status_code = getattr(api_error, "status_code", None)
 								                    is_payload_too_large = (
 								                        status_code == 413
 								                        or 'request entity too large' in error_msg
-												fix(agent): enhance 413 error handling and improve conversation history management in tests

											
										
										
											2026-02-27 23:04:32 -08:00
+								                        or 'payload too large' in error_msg
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
+								                        or 'error code: 413' in error_msg
 								                    )
 								                    if is_payload_too_large:
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								                        compression_attempts += 1
 								                        if compression_attempts > max_compression_attempts:
 								                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
 								                            logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
 								                            self._persist_session(messages, conversation_history)
 								                            return {
 								                                "messages": messages,
 								                                "completed": False,
 								                                "api_calls": api_call_count,
 								                                "error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
 								                                "partial": True
 								                            }
 								                        print(f"{self.log_prefix}⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
 								                        original_len = len(messages)
 								                        messages, active_system_prompt = self._compress_context(
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								                            messages, system_message, approx_tokens=approx_tokens,
 								                            task_id=effective_task_id,
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
+								                        )
 								                        if len(messages) < original_len:
 								                            print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								                            time.sleep(2)  # Brief pause between compression retries
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
+								                            restart_with_compressed_messages = True
 								                            break
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
+								                        else:
 								                            print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
 								                            logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
 								                            self._persist_session(messages, conversation_history)
 								                            return {
 								                                "messages": messages,
 								                                "completed": False,
 								                                "api_calls": api_call_count,
 								                                "error": "Request payload too large (413). Cannot compress further.",
 								                                "partial": True
 								                            }
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								                    # Check for context-length errors BEFORE generic 4xx handler.
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
+								                    # Local backends (LM Studio, Ollama, llama.cpp) often return
 								                    # HTTP 400 with messages like "Context size has been exceeded"
 								                    # which must trigger compression, not an immediate abort.
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    is_context_length_error = any(phrase in error_msg for phrase in [
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
+								                        'context length', 'context size', 'maximum context',
 								                        'token limit', 'too many tokens', 'reduce the length',
 								                        'exceeds the limit', 'context window',
-												fix(agent): handle 413 payload-too-large via compression instead of aborting

The 413 "Request Entity Too Large" error from the LLM API was caught by the
generic 4xx handler which aborts immediately. This is wrong for 413 — it's a
payload-size issue that can be resolved by compressing conversation history.

- Intercept 413 before the generic 4xx block and route to _compress_context
- Exclude 413 from generic is_client_error detection
- Add 'request entity too large' to context-length phrases as safety net
- Add tests for 413 compression behavior

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-27 12:21:27 -05:00
+								                        'request entity too large',  # OpenRouter/Nous 413 safety net
-												fix: proactive compression after large tool results + Anthropic error detection

Two fixes for context overflow handling:

1. Proactive compression after tool execution: The compression check now
   estimates the next prompt size using real token counts from the last API
   response (prompt_tokens + completion_tokens) plus a conservative estimate
   of newly appended tool results (chars // 3 for JSON-heavy content).
   Previously, should_compress() only checked last_prompt_tokens which
   didn't account for tool results — so a 130k prompt + 100k chars of tool
   output would pass the 140k threshold check but fail the 200k API limit.

2. Safety net: Added 'prompt is too long' to context-length error detection
   phrases. Anthropic returns 'prompt is too long: N tokens > M maximum'
   on HTTP 400, which wasn't matched by existing phrases. This ensures
   compression fires even if the proactive check underestimates.

Fixes #813

											
										
										
											2026-03-11 08:04:52 -07:00
+								                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    ])
 								                    if is_context_length_error:
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								                        compressor = self.context_compressor
 								                        old_ctx = compressor.context_length
 								                        # Try to parse the actual limit from the error message
 								                        parsed_limit = parse_context_limit_from_error(error_msg)
 								                        if parsed_limit and parsed_limit < old_ctx:
 								                            new_ctx = parsed_limit
 								                            print(f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
 								                        else:
 								                            # Step down to the next probe tier
 								                            new_ctx = get_next_probe_tier(old_ctx)
 								                        if new_ctx and new_ctx < old_ctx:
 								                            compressor.context_length = new_ctx
 								                            compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
 								                            compressor._context_probed = True
 								                            print(f"{self.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
 								                        else:
 								                            print(f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...")
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								                        compression_attempts += 1
 								                        if compression_attempts > max_compression_attempts:
 								                            print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
 								                            logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
 								                            self._persist_session(messages, conversation_history)
 								                            return {
 								                                "messages": messages,
 								                                "completed": False,
 								                                "api_calls": api_call_count,
 								                                "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
 								                                "partial": True
 								                            }
 								                        print(f"{self.log_prefix}   🗜️  Context compression attempt {compression_attempts}/{max_compression_attempts}...")
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                        original_len = len(messages)
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                        messages, active_system_prompt = self._compress_context(
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								                            messages, system_message, approx_tokens=approx_tokens,
 								                            task_id=effective_task_id,
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                        )
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								                        if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
 								                            if len(messages) < original_len:
 								                                print(f"{self.log_prefix}   🗜️  Compressed {original_len} → {len(messages)} messages, retrying...")
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								                            time.sleep(2)  # Brief pause between compression retries
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
+								                            restart_with_compressed_messages = True
 								                            break
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                        else:
-												feat: smart context length probing with persistent caching + banner display

Replaces the unsafe 128K fallback for unknown models with a descending
probe strategy (2M → 1M → 512K → 200K → 128K → 64K → 32K). When a
context-length error occurs, the agent steps down tiers and retries.
The discovered limit is cached per model+provider combo in
~/.hermes/context_length_cache.yaml so subsequent sessions skip probing.

Also parses API error messages to extract the actual context limit
(e.g. 'maximum context length is 32768 tokens') for instant resolution.

The CLI banner now displays the context window size next to the model
name (e.g. 'claude-opus-4 · 200K context · Nous Research').

Changes:
- agent/model_metadata.py: CONTEXT_PROBE_TIERS, persistent cache
  (save/load/get), parse_context_limit_from_error(), get_next_probe_tier()
- agent/context_compressor.py: accepts base_url, passes to metadata
- run_agent.py: step-down logic in context error handler, caches on success
- cli.py + hermes_cli/banner.py: context length in welcome banner
- tests: 22 new tests for probing, parsing, and caching

Addresses #132. PR #319's approach (8K default) rejected — too conservative.

											
										
										
											2026-03-05 16:09:57 -08:00
+								                            # Can't compress further and already at minimum tier
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                            print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
 								                            print(f"{self.log_prefix}   💡 The conversation has accumulated too much content.")
 								                            logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                            return {
 								                                "messages": messages,
 								                                "completed": False,
 								                                "api_calls": api_call_count,
 								                                "error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
 								                                "partial": True
 								                            }
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
 								                    # Check for non-retryable client errors (4xx HTTP status codes).
 								                    # These indicate a problem with the request itself (bad model ID,
 								                    # invalid API key, forbidden, etc.) and will never succeed on retry.
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
+								                    # Note: 413 and context-length errors are excluded — handled above.
-												fix: reduce max_retries to 3 and make ValueError/TypeError non-retryable

- max_retries reduced from 6 to 3 — 6 retries with exponential backoff
  could stall for ~275s total on persistent errors
- ValueError and TypeError now detected as non-retryable client errors
  and abort immediately instead of being retried with backoff (these are
  local validation/programming errors that will never succeed on retry)

											
										
										
											2026-03-11 07:04:46 -07:00
+								                    # Also catch local validation errors (ValueError, TypeError) — these
 								                    # are programming bugs, not transient failures.
 								                    is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
-												fix: reduce max_retries to 3 and make ValueError/TypeError non-retryable

- max_retries reduced from 6 to 3 — 6 retries with exponential backoff
  could stall for ~275s total on persistent errors
- ValueError and TypeError now detected as non-retryable client errors
  and abort immediately instead of being retried with backoff (these are
  local validation/programming errors that will never succeed on retry)

											
										
										
											2026-03-11 07:04:46 -07:00
+								                    is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
+								                        'error code: 401', 'error code: 403',
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								                        'error code: 404', 'error code: 422',
 								                        'is not a valid model', 'invalid model', 'model not found',
 								                        'invalid api key', 'invalid_api_key', 'authentication',
 								                        'unauthorized', 'forbidden', 'not found',
 								                    ])) and not is_context_length_error
 								                    if is_client_error:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                        # Try fallback before aborting — a different provider
 								                        # may not have the same issue (rate limit, auth, etc.)
 								                        if self._try_activate_fallback():
 								                            retry_count = 0
 								                            continue
-												fix: preflight context compression + error handler ordering for model switches

Two fixes for the case where a user switches to a model with a smaller
context window while having a large existing session:

1. Preflight compression in run_conversation(): Before the main loop,
   estimate tokens of loaded history + system prompt. If it exceeds the
   model's compression threshold (85% of context), compress proactively
   with up to 3 passes. This naturally handles model switches because
   the gateway creates a fresh AIAgent per message with the current
   model's context length.

2. Error handler reordering: Context-length errors (400 with 'maximum
   context length' etc.) are now checked BEFORE the generic 4xx handler.
   Previously, OpenRouter's 400-status context-length errors were caught
   as non-retryable client errors and aborted immediately, never reaching
   the compression+retry logic.

Reported by Sonicrida on Discord: 840-message session (2MB+) crashed
after switching from a large-context model to minimax via OpenRouter.

											
										
										
											2026-03-04 14:42:41 -08:00
+								                        self._dump_api_request_debug(
 								                            api_kwargs, reason="non_retryable_client_error", error=api_error,
 								                        )
 								                        print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
 								                        print(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.")
 								                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
 								                        self._persist_session(messages, conversation_history)
 								                        return {
 								                            "final_response": None,
 								                            "messages": messages,
 								                            "api_calls": api_call_count,
 								                            "completed": False,
 								                            "failed": True,
 								                            "error": str(api_error),
 								                        }
-												Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400
with messages like "Context size has been exceeded" when the
context window is full. The error phrase list did not include
"context size" or "context window", so these errors fell through
to the generic 4xx abort handler instead of triggering compression.

Changes:
- Move context-length check above generic 4xx handler so it runs
  first (same pattern as the existing 413 check)
- Add "context size" and "context window" to the phrase list
- Guard 4xx handler with `not is_context_length_error` to prevent
  context-related 400s from being treated as non-retryable

											
										
										
											2026-03-05 01:12:34 +03:00
-												fix: correct off-by-one in retry exhaustion checks

The retry exhaustion checks used > instead of >= to compare
retry_count against max_retries. Since the while loop condition is
retry_count < max_retries, the check retry_count > max_retries can
never be true inside the loop. When retries are exhausted, the loop
exits and falls through to response.choices[0] on an invalid response,
crashing with IndexError instead of returning a proper error.

											
										
										
											2026-03-01 02:27:26 +03:00
+								                    if retry_count >= max_retries:
-												feat: simple fallback model for provider resilience

When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.

Config (in ~/.hermes/config.yaml):

  fallback_model:
    provider: openrouter
    model: anthropic/claude-sonnet-4

Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.

Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
  and invalid response exhaustion

Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).

Addresses #737.

25 new tests, 2492 total passing.

											
										
										
											2026-03-08 20:22:33 -07:00
+								                        # Try fallback before giving up entirely
 								                        if self._try_activate_fallback():
 								                            retry_count = 0
 								                            continue
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                        print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
 								                        logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
 								                        logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                        raise api_error
-												rate limits

											
										
										
											2025-11-17 18:35:36 -05:00
 								                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                    logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
-												fix(cli): increase max iterations for child agents and extend API call timeout for improved reliability

											
										
										
											2026-02-27 17:35:26 -08:00
+								                    if retry_count >= max_retries:
 								                        print(f"{self.log_prefix}⚠️  API call failed after {retry_count} attempts: {str(api_error)[:100]}")
 								                        print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
 								                    # Sleep in small increments so we can respond to interrupts quickly
 								                    # instead of blocking the entire wait_time in one sleep() call
 								                    sleep_end = time.time() + wait_time
 								                    while time.time() < sleep_end:
 								                        if self._interrupt_requested:
 								                            print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												Cherry-pick 6 bug fixes from PR #76 and update documentation

Code fixes (run_agent.py):
- Fix off-by-one in _flush_messages_to_session_db skipping one message per flush
- Add clear_interrupt() to 3 early-return paths preventing stale interrupt state
- Wrap handle_function_call in try/except so tool crashes don't kill the conversation
- Replace fragile `is` identity check with _flush_sentinel marker for memory flush cleanup
- Fix retry loop off-by-one (6 attempts not 7)
- Remove redundant inline `import re`

											
										
										
											2026-02-27 03:21:49 -08:00
+								                            self.clear_interrupt()
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                            return {
-												fix: add context to interruption messages for model awareness

When the agent is interrupted, the model now receives descriptive
context instead of a generic 'Operation interrupted.' string:

- Tool skip messages include the tool name:
  '[Tool execution cancelled — terminal was skipped due to user interrupt]'
  '[Tool execution skipped — web_search was not started. User sent a new message]'

- API call interrupts include timing:
  'Operation interrupted: waiting for model response (4.2s elapsed).'

- Retry/error interrupts include retry context:
  'Operation interrupted: retrying API call after rate limit (retry 2/5).'
  'Operation interrupted: handling API error (Timeout: connection timed out).'

This helps the model understand what was happening when it was
interrupted, reducing wasted iterations spent re-discovering state.

											
										
										
											2026-03-08 18:58:23 -07:00
+								                                "final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								                                "messages": messages,
 								                                "api_calls": api_call_count,
 								                                "completed": False,
 								                                "interrupted": True,
 								                            }
 								                        time.sleep(0.2)  # Check interrupt every 200ms
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
-												feat: implement interactive prompts for sudo password and command approval in CLI

- Added methods for handling sudo password and dangerous command approval prompts using a callback mechanism in cli.py.
- Integrated these prompts with the prompt_toolkit UI for improved user experience.
- Updated terminal_tool.py to support callback registration for interactive prompts, enhancing the CLI's interactivity.
- Introduced a background thread for API calls in run_agent.py to allow for interrupt handling during long-running operations.
- Enhanced error handling for interrupted API calls, ensuring graceful degradation of user experience.

											
										
										
											2026-02-21 12:15:40 -08:00
+								            # If the API call was interrupted, skip response processing
 								            if interrupted:
 								                break
-												fix: rebuild compressed payload before retry

											
										
										
											2026-03-07 15:13:45 -05:00
+								            if restart_with_compressed_messages:
 								                api_call_count -= 1
 								                self.iteration_budget.refund()
 								                continue
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
+								            if restart_with_length_continuation:
 								                continue
-												fix: enhance payload and context compression handling

Added logic to manage multiple compression attempts for large payloads and context length errors. Introduced limits on compression attempts to prevent infinite retries, with appropriate logging and error handling. This ensures better resilience and user feedback when facing compression issues during API calls.

											
										
										
											2026-03-07 09:19:07 -08:00
+								            # Guard: if all retries exhausted without a successful response
 								            # (e.g. repeated context-length errors that exhausted retry_count),
 								            # the `response` variable is still None. Break out cleanly.
 								            if response is None:
 								                print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
 								                self._persist_session(messages, conversation_history)
 								                break
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            try:
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                if self.api_mode == "codex_responses":
 								                    assistant_message, finish_reason = self._normalize_codex_response(response)
 								                else:
 								                    assistant_message = response.choices[0].message
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												fix: handle non-string content from OpenAI-compatible servers (#759)

Some local LLM servers (llama-server, etc.) return message.content as
a dict or list instead of a plain string. This caused AttributeError
'dict object has no attribute strip' on every API call.

Normalizes content to string immediately after receiving the response:
- dict: extracts 'text' or 'content' field, falls back to json.dumps
- list: extracts text parts (OpenAI multimodal content format)
- other: str() conversion

Applied at the single point where response.choices[0].message is read
in the main agent loop, so all downstream .strip()/.startswith()/[:100]
operations work regardless of server implementation.

Closes #759

											
										
										
											2026-03-09 03:32:32 -07:00
+								                # Normalize content to string — some OpenAI-compatible servers
 								                # (llama-server, etc.) return content as a dict or list instead
 								                # of a plain string, which crashes downstream .strip() calls.
 								                if assistant_message.content is not None and not isinstance(assistant_message.content, str):
 								                    raw = assistant_message.content
 								                    if isinstance(raw, dict):
 								                        assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
 								                    elif isinstance(raw, list):
 								                        # Multimodal content list — extract text parts
 								                        parts = []
 								                        for part in raw:
 								                            if isinstance(part, str):
 								                                parts.append(part)
 								                            elif isinstance(part, dict) and part.get("type") == "text":
 								                                parts.append(part.get("text", ""))
 								                            elif isinstance(part, dict) and "text" in part:
 								                                parts.append(str(part["text"]))
 								                        assistant_message.content = "\n".join(parts)
 								                    else:
 								                        assistant_message.content = str(raw)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                # Handle assistant response
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                if assistant_message.content and not self.quiet_mode:
-												updates for stability and speed

											
										
										
											2026-01-08 08:57:51 +00:00
+								                    print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
-												feat(gateway): expose subagent tool calls and thinking to user (fixes #169) (#186)

When subagents run via delegate_task, the user now sees real-time
progress instead of silence:

CLI: tree-view activity lines print above the delegation spinner
  🔀 Delegating: research quantum computing
     ├─ 💭 "I'll search for papers first..."
     ├─ 🔍 web_search  "quantum computing"
     ├─ 📖 read_file  "paper.pdf"
     └─ ⠹ working... (18.2s)

Gateway (Telegram/Discord): batched progress summaries sent every
5 tool calls to avoid message spam. Remaining tools flushed on
subagent completion.

Changes:
- agent/display.py: add KawaiiSpinner.print_above() to print
  status lines above an active spinner without disrupting animation.
  Uses captured stdout (self._out) so it works inside the child's
  redirect_stdout(devnull).

- tools/delegate_tool.py: add _build_child_progress_callback()
  that creates a per-child callback relaying tool calls and
  thinking events to the parent's spinner (CLI) or progress
  queue (gateway). Each child gets its own callback instance,
  so parallel subagents don't share state. Includes _flush()
  for gateway batch completion.

- run_agent.py: fire tool_progress_callback with '_thinking'
  event when the model produces text content. Guarded by
  _delegate_depth > 0 so only subagents fire this (prevents
  gateway spam from main agent). REASONING_SCRATCHPAD/think/
  reasoning XML tags are stripped before display.

Tests: 21 new tests covering print_above, callback builder,
thinking relay, SCRATCHPAD filtering, batching, flush, thread
isolation, delegate_depth guard, and prefix handling.
											
										
										
											2026-03-01 10:18:00 +03:00
 								                # Notify progress callback of model's thinking (used by subagent
 								                # delegation to relay the child's reasoning to the parent display).
 								                # Guard: only fire for subagents (_delegate_depth >= 1) to avoid
 								                # spamming gateway platforms with the main agent's every thought.
 								                if (assistant_message.content and self.tool_progress_callback
 								                        and getattr(self, '_delegate_depth', 0) > 0):
 								                    _think_text = assistant_message.content.strip()
 								                    # Strip reasoning XML tags that shouldn't leak to parent display
 								                    _think_text = re.sub(
 								                        r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
 								                    ).strip()
 								                    first_line = _think_text.split('\n')[0][:80] if _think_text else ""
 								                    if first_line:
 								                        try:
 								                            self.tool_progress_callback("_thinking", first_line)
 								                        except Exception:
 								                            pass
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
+								                # Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
 								                # This means the model ran out of output tokens mid-reasoning — retry up to 2 times
-												refactor: streamline scratchpad handling in AIAgent

- Removed static methods for converting and checking <REASONING_SCRATCHPAD> tags, simplifying the codebase.
- Replaced calls to the removed methods with direct function calls for better clarity and maintainability.
- Updated trajectory saving logic to utilize a dedicated function for improved organization and readability.

											
										
										
											2026-02-23 09:55:09 -08:00
+								                if has_incomplete_scratchpad(assistant_message.content or ""):
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
+								                    if not hasattr(self, '_incomplete_scratchpad_retries'):
 								                        self._incomplete_scratchpad_retries = 0
 								                    self._incomplete_scratchpad_retries += 1
 								                    print(f"{self.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
 								                    if self._incomplete_scratchpad_retries <= 2:
 								                        print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
 								                        # Don't add the broken message, just retry
 								                        continue
 								                    else:
 								                        # Max retries - discard this turn and save as partial
 								                        print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
 								                        self._incomplete_scratchpad_retries = 0
 								                        rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								                        self._cleanup_task_resources(effective_task_id)
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                        self._persist_session(messages, conversation_history)
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
 								                        return {
 								                            "final_response": None,
 								                            "messages": rolled_back_messages,
 								                            "api_calls": api_call_count,
 								                            "completed": False,
 								                            "partial": True,
 								                            "error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
 								                        }
 								                # Reset incomplete scratchpad counter on clean response
 								                if hasattr(self, '_incomplete_scratchpad_retries'):
 								                    self._incomplete_scratchpad_retries = 0
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
 								                if self.api_mode == "codex_responses" and finish_reason == "incomplete":
 								                    if not hasattr(self, "_codex_incomplete_retries"):
 								                        self._codex_incomplete_retries = 0
 								                    self._codex_incomplete_retries += 1
 								                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
-												fix: handle None message content across codebase (fixes #276)

The OpenAI API returns content: null on assistant messages with tool
calls. msg.get('content', '') returns None when the key exists with
value None, causing TypeError on len(), string concatenation, and
.strip() in downstream code paths.

Fixed 4 locations that process conversation messages:
- agent/auxiliary_client.py:84 — None passed to API calls
- cli.py:1288 — crash on content[:200] and len(content)
- run_agent.py:3444 — crash on None.strip()
- honcho_integration/session.py:445 — 'None' rendered in transcript

13 other instances were verified safe (already protected, only process
user/tool messages, or use the safe pattern).

Pattern: msg.get('content', '') → msg.get('content') or ''

Fixes #276

											
										
										
											2026-03-02 02:23:53 -08:00
+								                    interim_has_content = bool((interim_msg.get("content") or "").strip())
-												Add OpenAI Codex provider runtime and responses integration (without .agent/PLANS.md)

											
										
										
											2026-02-25 18:20:38 -08:00
+								                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
 								                    if interim_has_content or interim_has_reasoning:
 								                        last_msg = messages[-1] if messages else None
 								                        duplicate_interim = (
 								                            isinstance(last_msg, dict)
 								                            and last_msg.get("role") == "assistant"
 								                            and last_msg.get("finish_reason") == "incomplete"
 								                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
 								                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
 								                        )
 								                        if not duplicate_interim:
 								                            messages.append(interim_msg)
 								                    if self._codex_incomplete_retries < 3:
 								                        if not self.quiet_mode:
 								                            print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
 								                        self._session_messages = messages
 								                        self._save_session_log(messages)
 								                        continue
 								                    self._codex_incomplete_retries = 0
 								                    self._persist_session(messages, conversation_history)
 								                    return {
 								                        "final_response": None,
 								                        "messages": messages,
 								                        "api_calls": api_call_count,
 								                        "completed": False,
 								                        "partial": True,
 								                        "error": "Codex response remained incomplete after 3 continuation attempts",
 								                    }
 								                elif hasattr(self, "_codex_incomplete_retries"):
 								                    self._codex_incomplete_retries = 0
-												Refactor BatchRunner and AIAgent for enhanced reasoning and tool management, improved tool definitions for fileops

- Updated `ALL_POSSIBLE_TOOLS` to auto-derive from `TOOL_TO_TOOLSET_MAP` for consistent schema.
- Introduced `_extract_reasoning_stats` function to track reasoning coverage in assistant turns.
- Enhanced `_process_batch_worker` to discard prompts with no reasoning and aggregate reasoning statistics.
- Updated documentation and comments for clarity on new features and changes.

											
										
										
											2026-02-08 20:19:14 +00:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                # Check for tool calls
 								                if assistant_message.tool_calls:
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                    if not self.quiet_mode:
 								                        print(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                    if self.verbose_logging:
 								                        for tc in assistant_message.tool_calls:
 								                            logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    # Validate tool call names - detect model hallucinations
-												fix: tool call repair — auto-lowercase, fuzzy match, helpful error on unknown tool (#520)

- Add _repair_tool_call(): tries lowercase, normalize, then fuzzy match (difflib 0.7)
- Replace 3-retry-then-abort with graceful error: model receives helpful message and self-corrects
- Conversation stays alive instead of dying on hallucinated tool names

Closes #520

											
										
										
											2026-03-08 18:39:43 +03:00
+								                    # Repair mismatched tool names before validating
 								                    for tc in assistant_message.tool_calls:
 								                        if tc.function.name not in self.valid_tool_names:
 								                            repaired = self._repair_tool_call(tc.function.name)
 								                            if repaired:
 								                                print(f"{self.log_prefix}🔧 Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'")
 								                                tc.function.name = repaired
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    invalid_tool_calls = [
-												fix: tool call repair — auto-lowercase, fuzzy match, helpful error on unknown tool (#520)

- Add _repair_tool_call(): tries lowercase, normalize, then fuzzy match (difflib 0.7)
- Replace 3-retry-then-abort with graceful error: model receives helpful message and self-corrects
- Conversation stays alive instead of dying on hallucinated tool names

Closes #520

											
										
										
											2026-03-08 18:39:43 +03:00
+								                        tc.function.name for tc in assistant_message.tool_calls
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                        if tc.function.name not in self.valid_tool_names
 								                    ]
 								                    if invalid_tool_calls:
-												fix: tool call repair — auto-lowercase, fuzzy match, helpful error on unknown tool (#520)

- Add _repair_tool_call(): tries lowercase, normalize, then fuzzy match (difflib 0.7)
- Replace 3-retry-then-abort with graceful error: model receives helpful message and self-corrects
- Conversation stays alive instead of dying on hallucinated tool names

Closes #520

											
										
										
											2026-03-08 18:39:43 +03:00
+								                        # Return helpful error to model — model can self-correct next turn
 								                        available = ", ".join(sorted(self.valid_tool_names))
 								                        invalid_name = invalid_tool_calls[0]
 								                        invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
 								                        print(f"{self.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for self-correction")
 								                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
 								                        messages.append(assistant_msg)
 								                        for tc in assistant_message.tool_calls:
 								                            if tc.function.name not in self.valid_tool_names:
 								                                content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
 								                            else:
 								                                content = f"Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
 								                            messages.append({
 								                                "role": "tool",
 								                                "tool_call_id": tc.id,
 								                                "content": content,
 								                            })
 								                        continue
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								                    # Reset retry counter on successful tool call validation
 								                    if hasattr(self, '_invalid_tool_retries'):
 								                        self._invalid_tool_retries = 0
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                    # Validate tool call arguments are valid JSON
-												Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.

											
										
										
											2026-02-04 13:57:59 -08:00
+								                    # Handle empty strings as empty objects (common model quirk)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                    invalid_json_args = []
 								                    for tc in assistant_message.tool_calls:
-												Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.

											
										
										
											2026-02-04 13:57:59 -08:00
+								                        args = tc.function.arguments
 								                        # Treat empty/whitespace strings as empty object
 								                        if not args or not args.strip():
 								                            tc.function.arguments = "{}"
 								                            continue
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        try:
-												Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.

											
										
										
											2026-02-04 13:57:59 -08:00
+								                            json.loads(args)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        except json.JSONDecodeError as e:
 								                            invalid_json_args.append((tc.function.name, str(e)))
 								                    if invalid_json_args:
 								                        # Track retries for invalid JSON arguments
 								                        self._invalid_json_retries += 1
 								                        tool_name, error_msg = invalid_json_args[0]
 								                        print(f"{self.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
 								                        if self._invalid_json_retries < 3:
 								                            print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
 								                            # Don't add anything to messages, just retry the API call
 								                            continue
 								                        else:
-												Enhance skills tool to have an arg so it is more reliably called, and error handling in agent

- Updated the `skills_categories` function to include a `verbose` parameter, allowing users to request skill counts per category.
- Modified the `handle_skills_function_call` method to pass the `verbose` argument to `skills_categories`.
- Improved error handling in the `AIAgent` class by injecting a recovery message when invalid JSON arguments are detected, guiding users on how to correct their tool calls.
- Enhanced the `GatewayRunner` to return a user-friendly error message if the agent fails to generate a final response, improving overall user experience.

											
										
										
											2026-02-03 15:26:59 -08:00
+								                            # Instead of returning partial, inject a helpful message and let model recover
 								                            print(f"{self.log_prefix}⚠️  Injecting recovery message for invalid JSON...")
 								                            self._invalid_json_retries = 0  # Reset for next attempt
 								                            # Add a user message explaining the issue
 								                            recovery_msg = (
 								                                f"Your tool call to '{tool_name}' had invalid JSON arguments. "
 								                                f"Error: {error_msg}. "
 								                                f"For tools with no required parameters, use an empty object: {{}}. "
 								                                f"Please either retry the tool call with valid JSON, or respond without using that tool."
 								                            )
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            recovery_dict = {"role": "user", "content": recovery_msg}
 								                            messages.append(recovery_dict)
-												Enhance skills tool to have an arg so it is more reliably called, and error handling in agent

- Updated the `skills_categories` function to include a `verbose` parameter, allowing users to request skill counts per category.
- Modified the `handle_skills_function_call` method to pass the `verbose` argument to `skills_categories`.
- Improved error handling in the `AIAgent` class by injecting a recovery message when invalid JSON arguments are detected, guiding users on how to correct their tool calls.
- Enhanced the `GatewayRunner` to return a user-friendly error message if the agent fails to generate a final response, improving overall user experience.

											
										
										
											2026-02-03 15:26:59 -08:00
+								                            continue
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
 								                    # Reset retry counter on successful JSON validation
 								                    self._invalid_json_retries = 0
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								                    # If this turn has both content AND tool_calls, capture the content
 								                    # as a fallback final response. Common pattern: model delivers its
 								                    # answer and calls memory/skill tools as a side-effect in the same
 								                    # turn. If the follow-up turn after tools is empty, we use this.
 								                    turn_content = assistant_message.content or ""
 								                    if turn_content and self._has_content_after_think_block(turn_content):
 								                        self._last_content_with_tools = turn_content
 								                        # Show intermediate commentary so the user can follow along
 								                        if self.quiet_mode:
 								                            clean = self._strip_think_blocks(turn_content).strip()
 								                            if clean:
-												fix(agent): remove preview truncation in assistant message output

Updated the AIAgent class to print the full content of assistant messages without truncation, enhancing visibility of the messages during runtime. This change improves the clarity of communication from the agent.

											
										
										
											2026-03-02 00:32:06 -08:00
+								                                print(f"  ┊ 💬 {clean}")
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                    messages.append(assistant_msg)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												fix: proactive compression after large tool results + Anthropic error detection

Two fixes for context overflow handling:

1. Proactive compression after tool execution: The compression check now
   estimates the next prompt size using real token counts from the last API
   response (prompt_tokens + completion_tokens) plus a conservative estimate
   of newly appended tool results (chars // 3 for JSON-heavy content).
   Previously, should_compress() only checked last_prompt_tokens which
   didn't account for tool results — so a 130k prompt + 100k chars of tool
   output would pass the 140k threshold check but fail the 200k API limit.

2. Safety net: Added 'prompt is too long' to context-length error detection
   phrases. Anthropic returns 'prompt is too long: N tokens > M maximum'
   on HTTP 400, which wasn't matched by existing phrases. This ensures
   compression fires even if the proactive check underestimates.

Fixes #813

											
										
										
											2026-03-11 08:04:52 -07:00
+								                    _msg_count_before_tools = len(messages)
-												feat: iteration budget pressure via tool result injection

Two-tier warning system that nudges the LLM as it approaches
max_iterations, injected into the last tool result JSON rather
than as a separate system message:

- Caution (70%): {"_budget_warning": "[BUDGET: 42/60...]"}
- Warning (90%): {"_budget_warning": "[BUDGET WARNING: 54/60...]"}

For JSON tool results, adds a _budget_warning field to the existing
dict. For plain text results, appends the warning as text.

Key properties:
- No system messages injected mid-conversation
- No changes to message structure
- Prompt cache stays valid
- Configurable thresholds (0.7 / 0.9)
- Can be disabled: _budget_pressure_enabled = False

Inspired by PR #421 (@Bartok9) and issue #414.
8 tests covering thresholds, edge cases, JSON and text injection.

											
										
										
											2026-03-09 05:19:42 -07:00
+								                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
-												feat: shared iteration budget across parent + subagents

Subagent tool calls now count toward the same session-wide iteration
limit as the parent agent. Previously, each subagent had its own
independent counter, so a parent with max_iterations=60 could spawn
3 subagents each doing 50 calls = 150 total tool calls unmetered.

Changes:
- IterationBudget: thread-safe shared counter (run_agent.py)
  - consume(): try to use one iteration, returns False if exhausted
  - refund(): give back one iteration (for execute_code turns)
  - Thread-safe via Lock (subagents run in ThreadPoolExecutor)
- Parent creates the budget, children inherit it via delegate_tool.py
- execute_code turns are refunded (don't count against budget)
- Default raised from 60 → 90 to account for shared consumption
- Per-child cap (50) still applies as a safety valve

The per-child max_iterations (default 50) remains as a per-child
ceiling, but the shared budget is the hard session-wide limit.
A child stops at whichever comes first.

											
										
										
											2026-03-07 08:16:37 -08:00
 								                    # Refund the iteration if the ONLY tool(s) called were
 								                    # execute_code (programmatic tool calling).  These are
 								                    # cheap RPC-style calls that shouldn't eat the budget.
 								                    _tc_names = {tc.function.name for tc in assistant_message.tool_calls}
 								                    if _tc_names == {"execute_code"}:
 								                        self.iteration_budget.refund()
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												fix: proactive compression after large tool results + Anthropic error detection

Two fixes for context overflow handling:

1. Proactive compression after tool execution: The compression check now
   estimates the next prompt size using real token counts from the last API
   response (prompt_tokens + completion_tokens) plus a conservative estimate
   of newly appended tool results (chars // 3 for JSON-heavy content).
   Previously, should_compress() only checked last_prompt_tokens which
   didn't account for tool results — so a 130k prompt + 100k chars of tool
   output would pass the 140k threshold check but fail the 200k API limit.

2. Safety net: Added 'prompt is too long' to context-length error detection
   phrases. Anthropic returns 'prompt is too long: N tokens > M maximum'
   on HTTP 400, which wasn't matched by existing phrases. This ensures
   compression fires even if the proactive check underestimates.

Fixes #813

											
										
										
											2026-03-11 08:04:52 -07:00
+								                    # Estimate next prompt size using real token counts from the
 								                    # last API response + rough estimate of newly appended tool
 								                    # results.  This catches cases where tool results push the
 								                    # context past the limit that last_prompt_tokens alone misses
 								                    # (e.g. large file reads, web extractions).
 								                    _compressor = self.context_compressor
 								                    _new_tool_msgs = messages[_msg_count_before_tools:]
 								                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
 								                    _estimated_next_prompt = (
 								                        _compressor.last_prompt_tokens
 								                        + _compressor.last_completion_tokens
 								                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
 								                    )
 								                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                        messages, active_system_prompt = self._compress_context(
 								                            messages, system_message,
-												fix: detect and warn on file re-read loops after context compression

When context compression summarizes conversation history, the agent
loses track of which files it already read and re-reads them in a loop.
Users report the agent reading the same files endlessly without writing.

Root cause: context compression is lossy — file contents and read history
are lost in the summary. After compression, the model thinks it hasn't
examined the files yet and reads them again.

Fix (two-part):
1. Track file reads per task in file_tools.py. When the same file region
   is read again, include a _warning in the response telling the model
   to stop re-reading and use existing information.
2. After context compression, inject a structured message listing all
   files already read in the session with explicit "do NOT re-read"
   instruction, preserving read history across compression boundaries.

Adds 16 tests covering warning detection, task isolation, summary
accuracy, tracker cleanup, and compression history injection.

											
										
										
											2026-03-08 20:44:42 +03:00
+								                            approx_tokens=self.context_compressor.last_prompt_tokens,
 								                            task_id=effective_task_id,
-												Add context compression feature for long conversations

- Implemented automatic context compression to manage long conversations that approach the model's context limit.
- Configured the feature to summarize middle turns while protecting the first three and last four turns, ensuring important context is retained.
- Added configuration options in `cli-config.yaml` and environment variables for enabling/disabling compression and setting thresholds.
- Updated documentation in `README.md`, `cli.md`, and `.env.example` to explain the context compression functionality and its configuration.
- Enhanced the `cli.py` to load compression settings into environment variables, ensuring seamless integration with the CLI.
- Completed the implementation of context compression as outlined in the TODO list, marking it as a significant enhancement to conversation management.

											
										
										
											2026-02-01 18:01:31 -08:00
+								                        )
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
+								                    # Save session log incrementally (so progress is visible even if interrupted)
 								                    self._session_messages = messages
 								                    self._save_session_log(messages)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    # Continue loop for next response
 								                    continue
 								                else:
 								                    # No tool calls - this is the final response
 								                    final_response = assistant_message.content or ""
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                    # Check if response only has think block with no actual content after it
 								                    if not self._has_content_after_think_block(final_response):
-												fix: improve handling of empty responses after tool calls

- Added fallback mechanism to utilize previous content when the model generates an empty response after tool calls, reducing unnecessary API retries.
- Enhanced logging to indicate when prior content is used as a final response.
- Updated logic to ensure that genuine empty responses are retried appropriately, maintaining user experience.

											
										
										
											2026-03-06 16:54:31 -08:00
+								                        # If the previous turn already delivered real content alongside
 								                        # tool calls (e.g. "You're welcome!" + memory save), the model
 								                        # has nothing more to say. Use the earlier content immediately
 								                        # instead of wasting API calls on retries that won't help.
 								                        fallback = getattr(self, '_last_content_with_tools', None)
 								                        if fallback:
 								                            logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
 								                            self._last_content_with_tools = None
 								                            self._empty_content_retries = 0
 								                            for i in range(len(messages) - 1, -1, -1):
 								                                msg = messages[i]
 								                                if msg.get("role") == "assistant" and msg.get("tool_calls"):
 								                                    tool_names = []
 								                                    for tc in msg["tool_calls"]:
 								                                        fn = tc.get("function", {})
 								                                        tool_names.append(fn.get("name", "unknown"))
 								                                    msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
 								                                    break
 								                            final_response = self._strip_think_blocks(fallback).strip()
 								                            break
 								                        # No fallback available — this is a genuine empty response.
 								                        # Retry in case the model just had a bad generation.
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        if not hasattr(self, '_empty_content_retries'):
 								                            self._empty_content_retries = 0
 								                        self._empty_content_retries += 1
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                        reasoning_text = self._extract_reasoning(assistant_message)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                        print(f"{self.log_prefix}⚠️  Response only contains think block with no content after it")
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                        if reasoning_text:
 								                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
 								                            print(f"{self.log_prefix}   Reasoning: {reasoning_preview}")
 								                        else:
 								                            content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
 								                            print(f"{self.log_prefix}   Content: '{content_preview}'")
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
 								                        if self._empty_content_retries < 3:
 								                            print(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
 								                            continue
 								                        else:
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                            print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
 								                            self._empty_content_retries = 0
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								                            # If a prior tool_calls turn had real content, salvage it:
 								                            # rewrite that turn's content to a brief tool description,
 								                            # and use the original content as the final response here.
 								                            fallback = getattr(self, '_last_content_with_tools', None)
 								                            if fallback:
 								                                self._last_content_with_tools = None
 								                                # Find the last assistant message with tool_calls and rewrite it
 								                                for i in range(len(messages) - 1, -1, -1):
 								                                    msg = messages[i]
 								                                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
 								                                        tool_names = []
 								                                        for tc in msg["tool_calls"]:
 								                                            fn = tc.get("function", {})
 								                                            tool_names.append(fn.get("name", "unknown"))
 								                                        msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
 								                                        break
-												fix: strip <think> blocks from final response to users

Fixes #149

The _strip_think_blocks() method existed but was not applied to the
final_response in the normal completion path. This caused <think>...</think>
XML tags to leak into user-facing responses on all platforms (CLI, Telegram,
Discord, Slack, WhatsApp).

Changes:
- Strip think blocks from final_response before returning in normal path (line ~2600)
- Strip think blocks from fallback content when salvaging from prior tool_calls turn

Notes:
- The raw content with think blocks is preserved in messages[] for trajectory
  export - this only affects the user-facing final_response
- The _has_content_after_think_block() check still uses raw content before
  stripping, which is correct for detecting think-only responses

											
										
										
											2026-02-28 03:06:20 -05:00
+								                                # Strip <think> blocks from fallback content for user display
 								                                final_response = self._strip_think_blocks(fallback).strip()
-												feat: enhance AIAgent's tool usage nudges and content handling

- Introduced a method to strip <think> blocks from content, improving text visibility.
- Implemented counters to reset nudge intervals when memory and skill tools are used, enhancing user guidance.
- Captured content from turns with tool calls to provide fallback responses, ensuring continuity in conversation.
- Updated nudge logic to remind users about saving memories and creating skills based on interaction patterns.

											
										
										
											2026-02-22 21:33:28 -08:00
+								                                break
 								                            # No fallback -- append the empty message as-is
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                            empty_msg = {
 								                                "role": "assistant",
 								                                "content": final_response,
 								                                "reasoning": reasoning_text,
 								                                "finish_reason": finish_reason,
 								                            }
 								                            messages.append(empty_msg)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								                            self._cleanup_task_resources(effective_task_id)
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                            self._persist_session(messages, conversation_history)
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
 								                            return {
-												feat: update database schema and enhance message persistence

- Incremented schema version to 2 and added a new column `finish_reason` to the `messages` table.
- Implemented a method to flush un-logged messages to the session database, ensuring data integrity during conversation interruptions.
- Enhanced error handling to persist messages in various early-return scenarios, preventing data loss.

											
										
										
											2026-02-21 00:05:39 -08:00
+								                                "final_response": final_response or None,
 								                                "messages": messages,
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
+								                                "api_calls": api_call_count,
 								                                "completed": False,
 								                                "partial": True,
 								                                "error": "Model generated only think blocks with no actual response after 3 retries"
 								                            }
 								                    # Reset retry counter on successful content
 								                    if hasattr(self, '_empty_content_retries'):
 								                        self._empty_content_retries = 0
-												Fix Codex Responses continuation and schema parity

											
										
										
											2026-02-25 18:11:26 -08:00
 								                    if (
 								                        self.api_mode == "codex_responses"
 								                        and self.valid_tool_names
 								                        and codex_ack_continuations < 2
 								                        and self._looks_like_codex_intermediate_ack(
 								                            user_message=user_message,
 								                            assistant_content=final_response,
 								                            messages=messages,
 								                        )
 								                    ):
 								                        codex_ack_continuations += 1
 								                        interim_msg = self._build_assistant_message(assistant_message, "incomplete")
 								                        messages.append(interim_msg)
 								                        continue_msg = {
 								                            "role": "user",
 								                            "content": (
 								                                "[System: Continue now. Execute the required tool calls and only "
 								                                "send your final answer after completing the task.]"
 								                            ),
 								                        }
 								                        messages.append(continue_msg)
 								                        self._session_messages = messages
 								                        self._save_session_log(messages)
 								                        continue
 								                    codex_ack_continuations = 0
-												fix: continue after output-length truncation

											
										
										
											2026-03-07 18:45:17 -05:00
 								                    if truncated_response_prefix:
 								                        final_response = truncated_response_prefix + final_response
-												Implement enhanced response handling and tool call validation in run_agent

- Added methods to check for meaningful content after <think> blocks and to retrieve messages up to the last complete assistant turn.
- Introduced retry logic for handling truncated responses and invalid JSON arguments in tool calls, with a maximum retry limit.
- Improved logging for invalid JSON and empty responses, ensuring better error tracking and handling.
- Updated the batch data generation script to adjust dataset file, batch size, and ephemeral system prompt for improved context management.

											
										
										
											2026-01-10 13:04:43 +00:00
-												fix: strip <think> blocks from final response to users

Fixes #149

The _strip_think_blocks() method existed but was not applied to the
final_response in the normal completion path. This caused <think>...</think>
XML tags to leak into user-facing responses on all platforms (CLI, Telegram,
Discord, Slack, WhatsApp).

Changes:
- Strip think blocks from final_response before returning in normal path (line ~2600)
- Strip think blocks from fallback content when salvaging from prior tool_calls turn

Notes:
- The raw content with think blocks is preserved in messages[] for trajectory
  export - this only affects the user-facing final_response
- The _has_content_after_think_block() check still uses raw content before
  stripping, which is correct for detecting think-only responses

											
										
										
											2026-02-28 03:06:20 -05:00
+								                    # Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
 								                    final_response = self._strip_think_blocks(final_response).strip()
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								                    final_msg = self._build_assistant_message(assistant_message, finish_reason)
-												Implement reasoning extraction and enhance assistant message handling

- Added a new method `_extract_reasoning` to extract reasoning content from assistant messages, accommodating multiple formats from various providers.
- Updated message handling to ensure all assistant messages include reasoning content for API compatibility, preserving multi-turn reasoning context.
- Enhanced logging to capture reasoning details for debugging and analysis.
- Modified the TODO.md to reflect changes in planning and task management, emphasizing the need for structured task decomposition and progress tracking.

											
										
										
											2026-02-01 22:48:18 -08:00
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								                    messages.append(final_msg)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
-												Add a claude code-like CLI

- Introduced `cli-config.yaml.example` to provide a template for configuring the CLI behavior, including model settings, terminal tool configurations, agent behavior, and toolsets.
- Created `cli.py` for an interactive terminal interface, allowing users to start the Hermes Agent with various options and toolsets.
- Added `hermes` launcher script for convenient CLI access.
- Updated `model_tools.py` to support quiet mode for suppressing output during tool initialization and execution.
- Enhanced logging in various tools to respect quiet mode, improving user experience by reducing unnecessary output.
- Added `prompt_toolkit` to `requirements.txt` for improved CLI interaction capabilities.
- Created `TODO.md` for future improvements and enhancements to the Hermes Agent framework.

											
										
										
											2026-01-31 06:30:48 +00:00
+								                    if not self.quiet_mode:
 								                        print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                    break
 								            except Exception as e:
-												rate limits

											
										
										
											2025-11-17 18:35:36 -05:00
+								                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                print(f"❌ {error_msg}")
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								                if self.verbose_logging:
 								                    logging.exception("Detailed error information:")
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                # If an assistant message with tool_calls was already appended,
 								                # the API expects a role="tool" result for every tool_call_id.
 								                # Fill in error results for any that weren't answered yet.
 								                pending_handled = False
 								                for idx in range(len(messages) - 1, -1, -1):
 								                    msg = messages[idx]
 								                    if not isinstance(msg, dict):
 								                        break
 								                    if msg.get("role") == "tool":
 								                        continue
 								                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
 								                        answered_ids = {
 								                            m["tool_call_id"]
 								                            for m in messages[idx + 1:]
 								                            if isinstance(m, dict) and m.get("role") == "tool"
 								                        }
 								                        for tc in msg["tool_calls"]:
 								                            if tc["id"] not in answered_ids:
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                                err_msg = {
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                                    "role": "tool",
 								                                    "tool_call_id": tc["id"],
 								                                    "content": f"Error executing tool: {error_msg}",
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                                }
 								                                messages.append(err_msg)
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                        pending_handled = True
 								                    break
 								                if not pending_handled:
 								                    # Error happened before tool processing (e.g. response parsing).
 								                    # Use a user-role message so the model can see what went wrong
 								                    # without confusing the API with a fabricated assistant turn.
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                    sys_err_msg = {
-												Hermes Agent UX Improvements

											
										
										
											2026-02-22 02:16:11 -08:00
+								                        "role": "user",
 								                        "content": f"[System error during processing: {error_msg}]",
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								                    }
 								                    messages.append(sys_err_msg)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								                # If we're near the limit, break to avoid infinite loops
 								                if api_call_count >= self.max_iterations - 1:
 								                    final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
 								                    break
-												Critical: fix bug in new subagent tool call budget to not be session-level but tool call loop level

											
										
										
											2026-03-07 10:32:51 -08:00
+								        if final_response is None and (
 								            api_call_count >= self.max_iterations
 								            or self.iteration_budget.remaining <= 0
 								        ):
 								            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
 								                print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
-												refactor: enhance API interaction and message handling in AIAgent

- Introduced new methods in run_agent.py for building API keyword arguments and normalizing assistant messages from API responses.
- Added functionality for compressing conversation context and managing session state in SQLite.
- Improved tool call execution handling, including enhanced logging and error management.
- Updated path handling in multiple platform files to utilize pathlib for better compatibility and readability.

											
										
										
											2026-02-21 04:17:27 -08:00
+								            final_response = self._handle_max_iterations(messages, api_call_count)
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								        # Determine if conversation completed successfully
 								        completed = final_response is not None and api_call_count < self.max_iterations
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        # Save trajectory if enabled
 								        self._save_trajectory(messages, user_message, completed)
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								        # Clean up VM and browser for this task after conversation completes
-												Cleanup time!

											
										
										
											2026-02-20 23:23:32 -08:00
+								        self._cleanup_task_resources(effective_task_id)
-												prevent leakage of morph instances between tasks

											
										
										
											2025-11-04 03:32:43 -05:00
-												Fix session saving to DB with full conversation history (not just user/assistant messages without tool calls)

											
										
										
											2026-02-22 17:10:24 -08:00
+								        # Persist session to both JSON log and SQLite
 								        self._persist_session(messages, conversation_history)
-												feat: add Honcho AI-native memory integration

Opt-in persistent cross-session user modeling via Honcho. Reads
~/.honcho/config.json as single source of truth (shared with
Claude Code, Cursor, and other Honcho-enabled tools). Zero impact
when disabled or unconfigured.

- honcho_integration/ package (client, session manager, peer resolution)
- Host-based config resolution matching claude-honcho/cursor-honcho pattern
- Prefetch user context into system prompt per conversation turn
- Sync user/assistant messages to Honcho after each exchange
- query_user_context tool for mid-conversation dialectic reasoning
- Gated activation: requires ~/.honcho/config.json with enabled=true

											
										
										
											2026-02-25 19:34:25 -05:00
 								        # Sync conversation to Honcho for user modeling
 								        if final_response and not interrupted:
 								            self._honcho_sync(original_user_message, final_response)
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								        # Extract reasoning from the last assistant message (if any)
 								        last_reasoning = None
 								        for msg in reversed(messages):
 								            if msg.get("role") == "assistant" and msg.get("reasoning"):
 								                last_reasoning = msg["reasoning"]
 								                break
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								        # Build result with interrupt info if applicable
 								        result = {
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            "final_response": final_response,
-												feat(cli): add /reasoning command for effort level and display toggle

Combined implementation of reasoning management:
- /reasoning              Show current effort level and display state
- /reasoning <level>      Set reasoning effort (none, low, medium, high, xhigh)
- /reasoning show|on      Show model thinking/reasoning in output
- /reasoning hide|off     Hide model thinking/reasoning from output

Effort level changes persist to config and force agent re-init.
Display toggle updates the agent callback dynamically without re-init.

When display is enabled:
- Intermediate reasoning shown as dim [thinking] lines during tool loops
- Final reasoning shown in a bordered box above the response
- Long reasoning collapsed (5 lines intermediate, 10 lines final)

Also adds:
- reasoning_callback parameter to AIAgent
- last_reasoning in run_conversation result dict
- show_reasoning config option (display section, default: false)
- Display section in /config output
- 34 tests covering both features

Combines functionality from PR #789 and PR #790.

Co-authored-by: Aum Desai <Aum08Desai@users.noreply.github.com>
Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>

											
										
										
											2026-03-11 05:53:21 -07:00
+								            "last_reasoning": last_reasoning,
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								            "messages": messages,
 								            "api_calls": api_call_count,
-												Enhance batch processing and tool validation

- Added support for tracking partial results and tool error counts in batch processing.
- Implemented filtering of corrupted entries during batch file combination based on valid tool names.
- Updated terminal tool to improve command execution and error handling, including retry logic for transient failures.
- Refactored model tools to use a simple terminal tool with no session persistence.
- Improved logging and error messages for invalid API responses and tool calls.
- Introduced chunked processing for large content in web tools to manage size limitations effectively.

											
										
										
											2026-01-10 05:56:26 +00:00
+								            "completed": completed,
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
+								            "partial": False,  # True only when stopped due to invalid tool calls
 								            "interrupted": interrupted,
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        }
-												Implement interrupt handling for agent and CLI input and persistent prompt line at bottom of CLI :)

- Enhanced the AIAgent class to support interrupt requests, allowing for graceful interruption of ongoing tasks and processing of new messages.
- Updated the HermesCLI to manage user input in a persistent manner, enabling real-time interruption of the agent's conversation.
- Introduced a mechanism in the GatewayRunner to handle incoming messages while an agent is running, allowing for immediate response to user commands.
- Improved overall user experience by providing feedback during interruptions and ensuring that pending messages are processed correctly.

											
										
										
											2026-02-03 16:15:49 -08:00
 								        # Include interrupt message if one triggered the interrupt
 								        if interrupted and self._interrupt_message:
 								            result["interrupt_message"] = self._interrupt_message
 								        # Clear interrupt state after handling
 								        self.clear_interrupt()
 								        return result
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
 								    def chat(self, message: str) -> str:
 								        """
 								        Simple chat interface that returns just the final response.
 								        Args:
 								            message (str): User message
 								        Returns:
 								            str: Final assistant response
 								        """
 								        result = self.run_conversation(message)
 								        return result["final_response"]
 								def main(
 								    query: str = None,
-												Enhance BatchRunner and AIAgent with new configuration options, default model now opus 4.6, default summarizer gemini flash 3

- Added `max_tokens`, `reasoning_config`, and `prefill_messages` parameters to `BatchRunner` and `AIAgent` for improved model response control.
- Updated CLI to support new options for reasoning effort and prefill messages from a JSON file.
- Modified example configuration files to reflect changes in default model and summary model.
- Improved error handling for loading prefill messages and reasoning configurations in the CLI.
- Updated documentation to include new parameters and usage examples.

											
										
										
											2026-02-08 10:49:24 +00:00
+								    model: str = "anthropic/claude-opus-4.6",
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    api_key: str = None,
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								    base_url: str = "https://openrouter.ai/api/v1",
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    max_turns: int = 10,
 								    enabled_toolsets: str = None,
 								    disabled_toolsets: str = None,
 								    list_tools: bool = False,
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								    save_trajectories: bool = False,
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								    save_sample: bool = False,
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								    verbose: bool = False,
 								    log_prefix_chars: int = 20
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								):
 								    """
 								    Main function for running the agent directly.
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    Args:
 								        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
-												fix: update model version in agent configurations

Updated the default model version from "anthropic/claude-sonnet-4-20250514" to "anthropic/claude-sonnet-4.6" across multiple files including AGENTS.md, batch_runner.py, mini_swe_runner.py, and run_agent.py for consistency and to reflect the latest model improvements.

											
										
										
											2026-03-07 09:06:37 -08:00
+								        model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4.6.
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								        api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
 								        base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        max_turns (int): Maximum number of API call iterations. Defaults to 10.
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
 								                              toolsets (e.g., "research", "development", "safe").
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								                              Multiple toolsets can be combined: "web,vision"
 								        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
 								        list_tools (bool): Just list available tools and exit
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								        save_trajectories (bool): Save conversation trajectories to JSONL files (appends to trajectory_samples.jsonl). Defaults to False.
 								        save_sample (bool): Save a single trajectory sample to a UUID-named JSONL file for inspection. Defaults to False.
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								        verbose (bool): Enable verbose logging for debugging. Defaults to False.
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    Toolset Examples:
 								        - "research": Web search, extract, crawl + vision tools
 								    """
 								    print("🤖 AI Agent with Tool Calling")
 								    print("=" * 50)
 								    # Handle tool listing
 								    if list_tools:
 								        from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
 								        from toolsets import get_all_toolsets, get_toolset_info
 								        print("📋 Available Tools & Toolsets:")
 								        print("-" * 50)
 								        # Show new toolsets system
 								        print("\n🎯 Predefined Toolsets (New System):")
 								        print("-" * 40)
 								        all_toolsets = get_all_toolsets()
 								        # Group by category
 								        basic_toolsets = []
 								        composite_toolsets = []
 								        scenario_toolsets = []
 								        for name, toolset in all_toolsets.items():
 								            info = get_toolset_info(name)
 								            if info:
 								                entry = (name, info)
 								                if name in ["web", "terminal", "vision", "creative", "reasoning"]:
 								                    basic_toolsets.append(entry)
 								                elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
 								                    composite_toolsets.append(entry)
 								                else:
 								                    scenario_toolsets.append(entry)
 								        # Print basic toolsets
 								        print("\n📌 Basic Toolsets:")
 								        for name, info in basic_toolsets:
 								            tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
 								            print(f"  • {name:15} - {info['description']}")
 								            print(f"    Tools: {tools_str}")
 								        # Print composite toolsets
 								        print("\n📂 Composite Toolsets (built from other toolsets):")
 								        for name, info in composite_toolsets:
 								            includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
 								            print(f"  • {name:15} - {info['description']}")
 								            print(f"    Includes: {includes_str}")
 								            print(f"    Total tools: {info['tool_count']}")
 								        # Print scenario-specific toolsets
 								        print("\n🎭 Scenario-Specific Toolsets:")
 								        for name, info in scenario_toolsets:
 								            print(f"  • {name:20} - {info['description']}")
 								            print(f"    Total tools: {info['tool_count']}")
 								        # Show legacy toolset compatibility
 								        print("\n📦 Legacy Toolsets (for backward compatibility):")
 								        legacy_toolsets = get_available_toolsets()
 								        for name, info in legacy_toolsets.items():
 								            status = "✅" if info["available"] else "❌"
 								            print(f"  {status} {name}: {info['description']}")
 								            if not info["available"]:
 								                print(f"    Requirements: {', '.join(info['requirements'])}")
 								        # Show individual tools
 								        all_tools = get_all_tool_names()
 								        print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
 								        for tool_name in sorted(all_tools):
 								            toolset = get_toolset_for_tool(tool_name)
 								            print(f"  📌 {tool_name} (from {toolset})")
 								        print(f"\n💡 Usage Examples:")
 								        print(f"  # Use predefined toolsets")
 								        print(f"  python run_agent.py --enabled_toolsets=research --query='search for Python news'")
 								        print(f"  python run_agent.py --enabled_toolsets=development --query='debug this code'")
 								        print(f"  python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
 								        print(f"  ")
 								        print(f"  # Combine multiple toolsets")
 								        print(f"  python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
 								        print(f"  ")
 								        print(f"  # Disable toolsets")
 								        print(f"  python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
 								        print(f"  ")
 								        print(f"  # Run with trajectory saving enabled")
 								        print(f"  python run_agent.py --save_trajectories --query='your question here'")
 								        return
 								    # Parse toolset selection arguments
 								    enabled_toolsets_list = None
 								    disabled_toolsets_list = None
 								    if enabled_toolsets:
 								        enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
 								        print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
 								    if disabled_toolsets:
 								        disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
 								        print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
 								    if save_trajectories:
 								        print(f"💾 Trajectory saving: ENABLED")
 								        print(f"   - Successful conversations → trajectory_samples.jsonl")
 								        print(f"   - Failed conversations → failed_trajectories.jsonl")
 								    # Initialize agent with provided parameters
 								    try:
 								        agent = AIAgent(
 								            base_url=base_url,
 								            model=model,
 								            api_key=api_key,
 								            max_iterations=max_turns,
 								            enabled_toolsets=enabled_toolsets_list,
 								            disabled_toolsets=disabled_toolsets_list,
-												add better logging when requests fail

											
										
										
											2025-09-10 00:51:41 -07:00
+								            save_trajectories=save_trajectories,
-												add logging of prefix of tool call and tool response

											
										
										
											2025-11-07 14:43:44 -05:00
+								            verbose_logging=verbose,
 								            log_prefix_chars=log_prefix_chars
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								        )
 								    except RuntimeError as e:
 								        print(f"❌ Failed to initialize agent: {e}")
 								        return
 								    # Use provided query or default to Python 3.13 example
 								    if query is None:
 								        user_query = (
 								            "Tell me about the latest developments in Python 3.13 and what new features "
 								            "developers should know about. Please search for current information and try it out."
 								        )
 								    else:
 								        user_query = query
 								    print(f"\n📝 User Query: {user_query}")
 								    print("\n" + "=" * 50)
 								    # Run conversation
 								    result = agent.run_conversation(user_query)
 								    print("\n" + "=" * 50)
 								    print("📋 CONVERSATION SUMMARY")
 								    print("=" * 50)
 								    print(f"✅ Completed: {result['completed']}")
 								    print(f"📞 API Calls: {result['api_calls']}")
 								    print(f"💬 Messages: {len(result['messages'])}")
 								    if result['final_response']:
 								        print(f"\n🎯 FINAL RESPONSE:")
 								        print("-" * 30)
 								        print(result['final_response'])
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								    # Save sample trajectory to UUID-named file if requested
 								    if save_sample:
 								        sample_id = str(uuid.uuid4())[:8]
-												Add timeout configuration for trajectory processing

- Updated `trajectory_compression.yaml` to include a new `per_trajectory_timeout` setting, allowing for a timeout of 300 seconds per trajectory. This enhancement helps prevent hanging on problematic entries during processing, improving overall reliability and efficiency in trajectory handling.

											
										
										
											2026-01-30 07:34:58 +00:00
+								        sample_filename = f"sample_{sample_id}.json"
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
 								        # Convert messages to trajectory format (same as batch_runner)
 								        trajectory = agent._convert_to_trajectory_format(
 								            result['messages'],
 								            user_query,
 								            result['completed']
 								        )
 								        entry = {
 								            "conversations": trajectory,
 								            "timestamp": datetime.now().isoformat(),
 								            "model": model,
 								            "completed": result['completed'],
 								            "query": user_query
 								        }
 								        try:
 								            with open(sample_filename, "w", encoding="utf-8") as f:
-												Add timeout configuration for trajectory processing

- Updated `trajectory_compression.yaml` to include a new `per_trajectory_timeout` setting, allowing for a timeout of 300 seconds per trajectory. This enhancement helps prevent hanging on problematic entries during processing, improving overall reliability and efficiency in trajectory handling.

											
										
										
											2026-01-30 07:34:58 +00:00
+								                # Pretty-print JSON with indent for readability
 								                f.write(json.dumps(entry, ensure_ascii=False, indent=2))
-												Add browser automation tools and enhance environment configuration

- Introduced new browser automation tools in `browser_tool.py` for navigating, interacting with, and extracting content from web pages using the agent-browser CLI and Browserbase cloud execution.
- Updated `.env.example` to include new configuration options for Browserbase API keys and session settings.
- Enhanced `model_tools.py` and `toolsets.py` to integrate browser tools into the existing tool framework, ensuring consistent access across toolsets.
- Updated `README.md` with setup instructions for browser tools and their usage examples.
- Added new test script `test_modal_terminal.py` to validate Modal terminal backend functionality.
- Improved `run_agent.py` to support browser tool integration and logging enhancements for better tracking of API responses.

											
										
										
											2026-01-29 06:10:24 +00:00
+								            print(f"\n💾 Sample trajectory saved to: {sample_filename}")
 								        except Exception as e:
 								            print(f"\n⚠️ Failed to save sample: {e}")
-												Update to use toolsets and make them easy to create and configure

											
										
										
											2025-09-10 00:43:55 -07:00
+								    print("\n👋 Agent execution completed!")
 								if __name__ == "__main__":
 								    fire.Fire(main)