2025-09-10 00:43:55 -07:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
|
|
|
|
AI Agent Runner with Tool Calling
|
|
|
|
|
|
|
|
|
|
|
|
This module provides a clean, standalone agent that can execute AI models
|
|
|
|
|
|
with tool calling capabilities. It handles the conversation loop, tool execution,
|
|
|
|
|
|
and response management.
|
|
|
|
|
|
|
|
|
|
|
|
Features:
|
|
|
|
|
|
- Automatic tool calling loop until completion
|
|
|
|
|
|
- Configurable model parameters
|
|
|
|
|
|
- Error handling and recovery
|
|
|
|
|
|
- Message history management
|
|
|
|
|
|
- Support for multiple model providers
|
|
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
|
from run_agent import AIAgent
|
|
|
|
|
|
|
|
|
|
|
|
agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
|
|
|
|
|
|
response = agent.run_conversation("Tell me about the latest Python updates")
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
2026-02-10 06:49:41 +00:00
|
|
|
|
import copy
|
2026-02-25 18:11:26 -08:00
|
|
|
|
import hashlib
|
2025-09-10 00:43:55 -07:00
|
|
|
|
import json
|
2025-09-10 00:51:41 -07:00
|
|
|
|
import logging
|
2026-02-21 03:11:11 -08:00
|
|
|
|
logger = logging.getLogger(__name__)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
import os
|
2026-01-31 06:30:48 +00:00
|
|
|
|
import random
|
2026-02-20 23:23:32 -08:00
|
|
|
|
import re
|
2026-01-31 06:30:48 +00:00
|
|
|
|
import sys
|
2025-09-10 00:43:55 -07:00
|
|
|
|
import time
|
2026-01-31 06:30:48 +00:00
|
|
|
|
import threading
|
2026-02-25 18:20:38 -08:00
|
|
|
|
from types import SimpleNamespace
|
2026-02-01 15:36:26 -08:00
|
|
|
|
import uuid
|
2025-09-10 00:43:55 -07:00
|
|
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
|
|
from openai import OpenAI
|
|
|
|
|
|
import fire
|
|
|
|
|
|
from datetime import datetime
|
2025-10-01 09:54:17 +00:00
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
2026-02-26 16:49:14 +11:00
|
|
|
|
# Load .env from ~/.hermes/.env first, then project root as dev fallback
|
2025-10-01 09:54:17 +00:00
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
|
2026-02-26 16:49:14 +11:00
|
|
|
|
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
|
|
|
|
|
_user_env = _hermes_home / ".env"
|
|
|
|
|
|
_project_env = Path(__file__).parent / '.env'
|
|
|
|
|
|
if _user_env.exists():
|
2026-02-25 15:20:42 -08:00
|
|
|
|
try:
|
2026-02-26 16:49:14 +11:00
|
|
|
|
load_dotenv(dotenv_path=_user_env, encoding="utf-8")
|
2026-02-25 15:20:42 -08:00
|
|
|
|
except UnicodeDecodeError:
|
2026-02-26 16:49:14 +11:00
|
|
|
|
load_dotenv(dotenv_path=_user_env, encoding="latin-1")
|
|
|
|
|
|
logger.info("Loaded environment variables from %s", _user_env)
|
|
|
|
|
|
elif _project_env.exists():
|
|
|
|
|
|
try:
|
|
|
|
|
|
load_dotenv(dotenv_path=_project_env, encoding="utf-8")
|
|
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
|
load_dotenv(dotenv_path=_project_env, encoding="latin-1")
|
|
|
|
|
|
logger.info("Loaded environment variables from %s", _project_env)
|
2026-02-21 03:11:11 -08:00
|
|
|
|
else:
|
2026-02-26 16:49:14 +11:00
|
|
|
|
logger.info("No .env file found. Using system environment variables.")
|
|
|
|
|
|
|
|
|
|
|
|
# Point mini-swe-agent at ~/.hermes/ so it shares our config
|
|
|
|
|
|
os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
|
|
|
|
|
|
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
# Import our tool system
|
|
|
|
|
|
from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
|
2026-02-23 02:11:33 -08:00
|
|
|
|
from tools.terminal_tool import cleanup_vm
|
|
|
|
|
|
from tools.interrupt import set_interrupt as _set_interrupt
|
2026-01-29 06:10:24 +00:00
|
|
|
|
from tools.browser_tool import cleanup_browser
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-01 18:01:31 -08:00
|
|
|
|
import requests
|
|
|
|
|
|
|
2026-02-20 23:23:32 -08:00
|
|
|
|
from hermes_constants import OPENROUTER_BASE_URL, OPENROUTER_MODELS_URL
|
|
|
|
|
|
|
2026-02-21 22:31:43 -08:00
|
|
|
|
# Agent internals extracted to agent/ package for modularity
|
2026-02-22 02:31:52 -08:00
|
|
|
|
from agent.prompt_builder import (
|
|
|
|
|
|
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
2026-02-22 13:28:13 -08:00
|
|
|
|
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
2026-02-22 02:31:52 -08:00
|
|
|
|
)
|
2026-02-21 22:31:43 -08:00
|
|
|
|
from agent.model_metadata import (
|
|
|
|
|
|
fetch_model_metadata, get_model_context_length,
|
|
|
|
|
|
estimate_tokens_rough, estimate_messages_tokens_rough,
|
2026-03-05 16:09:57 -08:00
|
|
|
|
get_next_probe_tier, parse_context_limit_from_error,
|
|
|
|
|
|
save_context_length,
|
2026-02-21 22:31:43 -08:00
|
|
|
|
)
|
|
|
|
|
|
from agent.context_compressor import ContextCompressor
|
|
|
|
|
|
from agent.prompt_caching import apply_anthropic_cache_control
|
|
|
|
|
|
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
|
|
|
|
|
|
from agent.display import (
|
|
|
|
|
|
KawaiiSpinner, build_tool_preview as _build_tool_preview,
|
|
|
|
|
|
get_cute_tool_message as _get_cute_tool_message_impl,
|
2026-02-28 22:49:58 -08:00
|
|
|
|
_detect_tool_failure,
|
2026-02-21 22:31:43 -08:00
|
|
|
|
)
|
|
|
|
|
|
from agent.trajectory import (
|
|
|
|
|
|
convert_scratchpad_to_think, has_incomplete_scratchpad,
|
|
|
|
|
|
save_trajectory as _save_trajectory_to_file,
|
2026-02-12 16:11:16 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-01-31 06:30:48 +00:00
|
|
|
|
|
2026-03-07 08:16:37 -08:00
|
|
|
|
class IterationBudget:
|
|
|
|
|
|
"""Thread-safe shared iteration counter for parent and child agents.
|
|
|
|
|
|
|
|
|
|
|
|
Tracks total LLM-call iterations consumed across a parent agent and all
|
|
|
|
|
|
its subagents. A single ``IterationBudget`` is created by the parent
|
|
|
|
|
|
and passed to every child so they share the same cap.
|
|
|
|
|
|
|
|
|
|
|
|
``execute_code`` (programmatic tool calling) iterations are refunded via
|
|
|
|
|
|
:meth:`refund` so they don't eat into the budget.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, max_total: int):
|
|
|
|
|
|
self.max_total = max_total
|
|
|
|
|
|
self._used = 0
|
|
|
|
|
|
self._lock = threading.Lock()
|
|
|
|
|
|
|
|
|
|
|
|
def consume(self) -> bool:
|
|
|
|
|
|
"""Try to consume one iteration. Returns True if allowed."""
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
if self._used >= self.max_total:
|
|
|
|
|
|
return False
|
|
|
|
|
|
self._used += 1
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def refund(self) -> None:
|
|
|
|
|
|
"""Give back one iteration (e.g. for execute_code turns)."""
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
if self._used > 0:
|
|
|
|
|
|
self._used -= 1
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def used(self) -> int:
|
|
|
|
|
|
return self._used
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def remaining(self) -> int:
|
|
|
|
|
|
with self._lock:
|
|
|
|
|
|
return max(0, self.max_total - self._used)
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
class AIAgent:
|
|
|
|
|
|
"""
|
|
|
|
|
|
AI Agent with tool calling capabilities.
|
|
|
|
|
|
|
|
|
|
|
|
This class manages the conversation flow, tool execution, and response handling
|
|
|
|
|
|
for AI models that support function calling.
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
2025-11-03 17:42:23 -05:00
|
|
|
|
self,
|
|
|
|
|
|
base_url: str = None,
|
|
|
|
|
|
api_key: str = None,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
provider: str = None,
|
|
|
|
|
|
api_mode: str = None,
|
2026-02-08 10:49:24 +00:00
|
|
|
|
model: str = "anthropic/claude-opus-4.6", # OpenRouter format
|
2026-03-07 08:16:37 -08:00
|
|
|
|
max_iterations: int = 90, # Default tool-calling iterations (shared with subagents)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
tool_delay: float = 1.0,
|
|
|
|
|
|
enabled_toolsets: List[str] = None,
|
|
|
|
|
|
disabled_toolsets: List[str] = None,
|
2025-09-10 00:51:41 -07:00
|
|
|
|
save_trajectories: bool = False,
|
2025-10-08 02:33:58 +00:00
|
|
|
|
verbose_logging: bool = False,
|
2026-01-31 06:30:48 +00:00
|
|
|
|
quiet_mode: bool = False,
|
2025-11-07 14:43:44 -05:00
|
|
|
|
ephemeral_system_prompt: str = None,
|
|
|
|
|
|
log_prefix_chars: int = 100,
|
2026-01-08 08:57:51 +00:00
|
|
|
|
log_prefix: str = "",
|
|
|
|
|
|
providers_allowed: List[str] = None,
|
|
|
|
|
|
providers_ignored: List[str] = None,
|
|
|
|
|
|
providers_order: List[str] = None,
|
|
|
|
|
|
provider_sort: str = None,
|
2026-03-01 18:24:27 -08:00
|
|
|
|
provider_require_parameters: bool = False,
|
|
|
|
|
|
provider_data_collection: str = None,
|
2026-02-01 15:36:26 -08:00
|
|
|
|
session_id: str = None,
|
2026-02-03 14:54:43 -08:00
|
|
|
|
tool_progress_callback: callable = None,
|
2026-03-09 23:26:43 -07:00
|
|
|
|
thinking_callback: callable = None,
|
2026-02-19 20:06:14 -08:00
|
|
|
|
clarify_callback: callable = None,
|
2026-02-28 17:09:26 -08:00
|
|
|
|
step_callback: callable = None,
|
2026-02-08 10:49:24 +00:00
|
|
|
|
max_tokens: int = None,
|
|
|
|
|
|
reasoning_config: Dict[str, Any] = None,
|
|
|
|
|
|
prefill_messages: List[Dict[str, Any]] = None,
|
2026-02-12 16:11:16 -08:00
|
|
|
|
platform: str = None,
|
2026-02-16 22:40:31 -08:00
|
|
|
|
skip_context_files: bool = False,
|
2026-02-19 00:57:31 -08:00
|
|
|
|
skip_memory: bool = False,
|
|
|
|
|
|
session_db=None,
|
2026-02-25 19:34:25 -05:00
|
|
|
|
honcho_session_key: str = None,
|
2026-03-07 08:16:37 -08:00
|
|
|
|
iteration_budget: "IterationBudget" = None,
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
fallback_model: Dict[str, Any] = None,
|
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
|
|
|
|
checkpoints_enabled: bool = False,
|
|
|
|
|
|
checkpoint_max_snapshots: int = 50,
|
2025-09-10 00:43:55 -07:00
|
|
|
|
):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Initialize the AI Agent.
|
2025-11-03 17:42:23 -05:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Args:
|
|
|
|
|
|
base_url (str): Base URL for the model API (optional)
|
|
|
|
|
|
api_key (str): API key for authentication (optional, uses env var if not provided)
|
2026-02-25 18:20:38 -08:00
|
|
|
|
provider (str): Provider identifier (optional; used for telemetry/routing hints)
|
|
|
|
|
|
api_mode (str): API mode override: "chat_completions" or "codex_responses"
|
2026-02-20 23:23:32 -08:00
|
|
|
|
model (str): Model name to use (default: "anthropic/claude-opus-4.6")
|
2026-03-07 08:16:37 -08:00
|
|
|
|
max_iterations (int): Maximum number of tool calling iterations (default: 90)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
tool_delay (float): Delay between tool calls in seconds (default: 1.0)
|
|
|
|
|
|
enabled_toolsets (List[str]): Only enable tools from these toolsets (optional)
|
|
|
|
|
|
disabled_toolsets (List[str]): Disable tools from these toolsets (optional)
|
|
|
|
|
|
save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
|
2025-09-10 00:51:41 -07:00
|
|
|
|
verbose_logging (bool): Enable verbose logging for debugging (default: False)
|
2026-01-31 06:30:48 +00:00
|
|
|
|
quiet_mode (bool): Suppress progress output for clean CLI experience (default: False)
|
2025-10-08 02:33:58 +00:00
|
|
|
|
ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
|
2026-02-20 23:23:32 -08:00
|
|
|
|
log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 100)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
log_prefix (str): Prefix to add to all log messages for identification in parallel processing (default: "")
|
|
|
|
|
|
providers_allowed (List[str]): OpenRouter providers to allow (optional)
|
|
|
|
|
|
providers_ignored (List[str]): OpenRouter providers to ignore (optional)
|
|
|
|
|
|
providers_order (List[str]): OpenRouter providers to try in order (optional)
|
|
|
|
|
|
provider_sort (str): Sort providers by price/throughput/latency (optional)
|
2026-02-01 15:36:26 -08:00
|
|
|
|
session_id (str): Pre-generated session ID for logging (optional, auto-generated if not provided)
|
2026-02-03 14:54:43 -08:00
|
|
|
|
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
|
2026-02-19 20:06:14 -08:00
|
|
|
|
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
|
|
|
|
|
|
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
|
2026-02-08 10:49:24 +00:00
|
|
|
|
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
|
|
|
|
|
|
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
|
2026-03-07 10:14:19 -08:00
|
|
|
|
If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
|
2026-02-08 10:49:24 +00:00
|
|
|
|
prefill_messages (List[Dict]): Messages to prepend to conversation history as prefilled context.
|
|
|
|
|
|
Useful for injecting a few-shot example or priming the model's response style.
|
|
|
|
|
|
Example: [{"role": "user", "content": "Hi!"}, {"role": "assistant", "content": "Hello!"}]
|
2026-02-12 16:11:16 -08:00
|
|
|
|
platform (str): The interface platform the user is on (e.g. "cli", "telegram", "discord", "whatsapp").
|
|
|
|
|
|
Used to inject platform-specific formatting hints into the system prompt.
|
2026-02-16 22:40:31 -08:00
|
|
|
|
skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
|
|
|
|
|
|
into the system prompt. Use this for batch processing and data generation to avoid
|
|
|
|
|
|
polluting trajectories with user-specific persona or project instructions.
|
2026-02-25 19:34:25 -05:00
|
|
|
|
honcho_session_key (str): Session key for Honcho integration (e.g., "telegram:123456" or CLI session_id).
|
|
|
|
|
|
When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
|
2025-09-10 00:43:55 -07:00
|
|
|
|
"""
|
|
|
|
|
|
self.model = model
|
|
|
|
|
|
self.max_iterations = max_iterations
|
2026-03-07 08:16:37 -08:00
|
|
|
|
# Shared iteration budget — parent creates, children inherit.
|
|
|
|
|
|
# Consumed by every LLM turn across parent + all subagents.
|
|
|
|
|
|
self.iteration_budget = iteration_budget or IterationBudget(max_iterations)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
self.tool_delay = tool_delay
|
|
|
|
|
|
self.save_trajectories = save_trajectories
|
2025-09-10 00:51:41 -07:00
|
|
|
|
self.verbose_logging = verbose_logging
|
2026-01-31 06:30:48 +00:00
|
|
|
|
self.quiet_mode = quiet_mode
|
2025-10-08 02:33:58 +00:00
|
|
|
|
self.ephemeral_system_prompt = ephemeral_system_prompt
|
2026-02-12 16:11:16 -08:00
|
|
|
|
self.platform = platform # "cli", "telegram", "discord", "whatsapp", etc.
|
2026-02-16 22:40:31 -08:00
|
|
|
|
self.skip_context_files = skip_context_files
|
2025-11-07 14:43:44 -05:00
|
|
|
|
self.log_prefix_chars = log_prefix_chars
|
2026-01-08 08:57:51 +00:00
|
|
|
|
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
2026-02-10 06:49:41 +00:00
|
|
|
|
# Store effective base URL for feature detection (prompt caching, reasoning, etc.)
|
|
|
|
|
|
# When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
|
2026-02-20 23:23:32 -08:00
|
|
|
|
self.base_url = base_url or OPENROUTER_BASE_URL
|
2026-02-25 18:20:38 -08:00
|
|
|
|
provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
|
|
|
|
|
|
self.provider = provider_name or "openrouter"
|
|
|
|
|
|
if api_mode in {"chat_completions", "codex_responses"}:
|
|
|
|
|
|
self.api_mode = api_mode
|
|
|
|
|
|
elif self.provider == "openai-codex":
|
|
|
|
|
|
self.api_mode = "codex_responses"
|
|
|
|
|
|
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
|
|
|
|
|
|
self.api_mode = "codex_responses"
|
|
|
|
|
|
self.provider = "openai-codex"
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.api_mode = "chat_completions"
|
2026-03-07 23:36:35 -08:00
|
|
|
|
|
2026-02-03 14:54:43 -08:00
|
|
|
|
self.tool_progress_callback = tool_progress_callback
|
2026-03-09 23:26:43 -07:00
|
|
|
|
self.thinking_callback = thinking_callback
|
2026-02-19 20:06:14 -08:00
|
|
|
|
self.clarify_callback = clarify_callback
|
2026-02-28 17:09:26 -08:00
|
|
|
|
self.step_callback = step_callback
|
2026-02-03 14:54:43 -08:00
|
|
|
|
self._last_reported_tool = None # Track for "new tool" mode
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
|
# Interrupt mechanism for breaking out of tool loops
|
|
|
|
|
|
self._interrupt_requested = False
|
|
|
|
|
|
self._interrupt_message = None # Optional message that triggered interrupt
|
|
|
|
|
|
|
2026-02-20 03:15:53 -08:00
|
|
|
|
# Subagent delegation state
|
|
|
|
|
|
self._delegate_depth = 0 # 0 = top-level agent, incremented for children
|
|
|
|
|
|
self._active_children = [] # Running child AIAgents (for interrupt propagation)
|
|
|
|
|
|
|
2026-01-08 08:57:51 +00:00
|
|
|
|
# Store OpenRouter provider preferences
|
|
|
|
|
|
self.providers_allowed = providers_allowed
|
|
|
|
|
|
self.providers_ignored = providers_ignored
|
|
|
|
|
|
self.providers_order = providers_order
|
|
|
|
|
|
self.provider_sort = provider_sort
|
2026-03-01 18:24:27 -08:00
|
|
|
|
self.provider_require_parameters = provider_require_parameters
|
|
|
|
|
|
self.provider_data_collection = provider_data_collection
|
2025-11-03 17:42:23 -05:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Store toolset filtering options
|
|
|
|
|
|
self.enabled_toolsets = enabled_toolsets
|
|
|
|
|
|
self.disabled_toolsets = disabled_toolsets
|
|
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Model response configuration
|
|
|
|
|
|
self.max_tokens = max_tokens # None = use model default
|
2026-03-07 10:14:19 -08:00
|
|
|
|
self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter)
|
2026-02-08 10:49:24 +00:00
|
|
|
|
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
|
|
|
|
|
|
|
2026-02-10 06:49:41 +00:00
|
|
|
|
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
|
|
|
|
|
|
# Reduces input costs by ~75% on multi-turn conversations by caching the
|
|
|
|
|
|
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
|
|
|
|
|
|
is_openrouter = "openrouter" in self.base_url.lower()
|
|
|
|
|
|
is_claude = "claude" in self.model.lower()
|
|
|
|
|
|
self._use_prompt_caching = is_openrouter and is_claude
|
|
|
|
|
|
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
|
|
|
|
|
|
2026-02-28 22:49:58 -08:00
|
|
|
|
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
|
|
|
|
|
|
# so tool failures, API errors, etc. are inspectable after the fact.
|
2026-02-28 21:56:05 -08:00
|
|
|
|
from agent.redact import RedactingFormatter
|
2026-02-28 22:49:58 -08:00
|
|
|
|
_error_log_dir = Path.home() / ".hermes" / "logs"
|
|
|
|
|
|
_error_log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
_error_log_path = _error_log_dir / "errors.log"
|
|
|
|
|
|
from logging.handlers import RotatingFileHandler
|
|
|
|
|
|
_error_file_handler = RotatingFileHandler(
|
|
|
|
|
|
_error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
|
|
|
|
|
|
)
|
|
|
|
|
|
_error_file_handler.setLevel(logging.WARNING)
|
|
|
|
|
|
_error_file_handler.setFormatter(RedactingFormatter(
|
|
|
|
|
|
'%(asctime)s %(levelname)s %(name)s: %(message)s',
|
|
|
|
|
|
))
|
|
|
|
|
|
logging.getLogger().addHandler(_error_file_handler)
|
|
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
|
level=logging.DEBUG,
|
|
|
|
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
|
|
|
datefmt='%H:%M:%S'
|
|
|
|
|
|
)
|
2026-02-28 21:56:05 -08:00
|
|
|
|
for handler in logging.getLogger().handlers:
|
|
|
|
|
|
handler.setFormatter(RedactingFormatter(
|
|
|
|
|
|
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
|
|
|
datefmt='%H:%M:%S',
|
|
|
|
|
|
))
|
2026-01-29 19:59:59 +00:00
|
|
|
|
# Keep third-party libraries at WARNING level to reduce noise
|
2026-01-08 08:57:51 +00:00
|
|
|
|
# We have our own retry and error logging that's more informative
|
|
|
|
|
|
logging.getLogger('openai').setLevel(logging.WARNING)
|
|
|
|
|
|
logging.getLogger('openai._base_client').setLevel(logging.WARNING)
|
2025-10-15 18:07:06 +00:00
|
|
|
|
logging.getLogger('httpx').setLevel(logging.WARNING)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
logging.getLogger('httpcore').setLevel(logging.WARNING)
|
2026-01-29 19:59:59 +00:00
|
|
|
|
logging.getLogger('asyncio').setLevel(logging.WARNING)
|
|
|
|
|
|
# Suppress Modal/gRPC related debug spam
|
|
|
|
|
|
logging.getLogger('hpack').setLevel(logging.WARNING)
|
|
|
|
|
|
logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
|
|
|
|
|
|
logging.getLogger('grpc').setLevel(logging.WARNING)
|
|
|
|
|
|
logging.getLogger('modal').setLevel(logging.WARNING)
|
|
|
|
|
|
logging.getLogger('rex-deploy').setLevel(logging.INFO) # Keep INFO for sandbox status
|
2026-02-21 03:11:11 -08:00
|
|
|
|
logger.info("Verbose logging enabled (third-party library logs suppressed)")
|
2025-09-10 00:51:41 -07:00
|
|
|
|
else:
|
|
|
|
|
|
# Set logging to INFO level for important messages only
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
|
|
|
|
datefmt='%H:%M:%S'
|
|
|
|
|
|
)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
# Suppress noisy library logging
|
|
|
|
|
|
logging.getLogger('openai').setLevel(logging.ERROR)
|
|
|
|
|
|
logging.getLogger('openai._base_client').setLevel(logging.ERROR)
|
|
|
|
|
|
logging.getLogger('httpx').setLevel(logging.ERROR)
|
|
|
|
|
|
logging.getLogger('httpcore').setLevel(logging.ERROR)
|
2026-02-21 12:41:05 -08:00
|
|
|
|
if self.quiet_mode:
|
|
|
|
|
|
# In quiet mode (CLI default), suppress all tool/infra log
|
|
|
|
|
|
# noise. The TUI has its own rich display for status; logger
|
|
|
|
|
|
# INFO/WARNING messages just clutter it.
|
|
|
|
|
|
for quiet_logger in [
|
|
|
|
|
|
'tools', # all tools.* (terminal, browser, web, file, etc.)
|
|
|
|
|
|
'minisweagent', # mini-swe-agent execution backend
|
|
|
|
|
|
'run_agent', # agent runner internals
|
|
|
|
|
|
'trajectory_compressor',
|
|
|
|
|
|
'cron', # scheduler (only relevant in daemon mode)
|
|
|
|
|
|
'hermes_cli', # CLI helpers
|
|
|
|
|
|
]:
|
|
|
|
|
|
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
2025-09-10 00:51:41 -07:00
|
|
|
|
|
2026-01-23 12:26:53 +00:00
|
|
|
|
# Initialize OpenAI client - defaults to OpenRouter
|
2025-09-10 00:43:55 -07:00
|
|
|
|
client_kwargs = {}
|
2026-01-23 12:26:53 +00:00
|
|
|
|
|
|
|
|
|
|
# Default to OpenRouter if no base_url provided
|
2025-09-10 00:43:55 -07:00
|
|
|
|
if base_url:
|
|
|
|
|
|
client_kwargs["base_url"] = base_url
|
2026-01-23 12:26:53 +00:00
|
|
|
|
else:
|
2026-02-20 23:23:32 -08:00
|
|
|
|
client_kwargs["base_url"] = OPENROUTER_BASE_URL
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2026-01-23 12:26:53 +00:00
|
|
|
|
# Handle API key - OpenRouter is the primary provider
|
2025-09-10 00:43:55 -07:00
|
|
|
|
if api_key:
|
|
|
|
|
|
client_kwargs["api_key"] = api_key
|
|
|
|
|
|
else:
|
2026-01-23 12:26:53 +00:00
|
|
|
|
# Primary: OPENROUTER_API_KEY, fallback to direct provider keys
|
2026-01-29 06:10:24 +00:00
|
|
|
|
client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-25 16:34:47 -08:00
|
|
|
|
# OpenRouter app attribution — shows hermes-agent in rankings/analytics
|
|
|
|
|
|
effective_base = client_kwargs.get("base_url", "")
|
|
|
|
|
|
if "openrouter" in effective_base.lower():
|
|
|
|
|
|
client_kwargs["default_headers"] = {
|
|
|
|
|
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
|
|
|
|
"X-OpenRouter-Title": "Hermes Agent",
|
2026-02-28 10:38:49 -08:00
|
|
|
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
2026-02-25 16:34:47 -08:00
|
|
|
|
}
|
2026-03-07 20:43:34 -05:00
|
|
|
|
elif "api.kimi.com" in effective_base.lower():
|
|
|
|
|
|
# Kimi Code API requires a recognized coding-agent User-Agent
|
|
|
|
|
|
# (see https://github.com/MoonshotAI/kimi-cli)
|
|
|
|
|
|
client_kwargs["default_headers"] = {
|
|
|
|
|
|
"User-Agent": "KimiCLI/1.0",
|
|
|
|
|
|
}
|
2026-02-25 16:34:47 -08:00
|
|
|
|
|
2026-02-23 02:11:33 -08:00
|
|
|
|
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
2025-09-10 00:43:55 -07:00
|
|
|
|
try:
|
|
|
|
|
|
self.client = OpenAI(**client_kwargs)
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"🤖 AI Agent initialized with model: {self.model}")
|
|
|
|
|
|
if base_url:
|
|
|
|
|
|
print(f"🔗 Using custom base URL: {base_url}")
|
|
|
|
|
|
# Always show API key info (masked) for debugging auth issues
|
|
|
|
|
|
key_used = client_kwargs.get("api_key", "none")
|
|
|
|
|
|
if key_used and key_used != "dummy-key" and len(key_used) > 12:
|
|
|
|
|
|
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
|
|
|
|
|
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
# Provider fallback — a single backup model/provider tried when the
|
|
|
|
|
|
# primary is exhausted (rate-limit, overload, connection failure).
|
|
|
|
|
|
# Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
|
|
|
|
|
|
self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
|
|
|
|
|
|
self._fallback_activated = False
|
|
|
|
|
|
if self._fallback_model:
|
|
|
|
|
|
fb_p = self._fallback_model.get("provider", "")
|
|
|
|
|
|
fb_m = self._fallback_model.get("model", "")
|
|
|
|
|
|
if fb_p and fb_m and not self.quiet_mode:
|
|
|
|
|
|
print(f"🔄 Fallback model: {fb_m} ({fb_p})")
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Get available tools with filtering
|
|
|
|
|
|
self.tools = get_tool_definitions(
|
|
|
|
|
|
enabled_toolsets=enabled_toolsets,
|
2026-01-31 06:30:48 +00:00
|
|
|
|
disabled_toolsets=disabled_toolsets,
|
|
|
|
|
|
quiet_mode=self.quiet_mode,
|
2025-09-10 00:43:55 -07:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-01-10 05:56:26 +00:00
|
|
|
|
# Show tool configuration and store valid tool names for validation
|
|
|
|
|
|
self.valid_tool_names = set()
|
2025-09-10 00:43:55 -07:00
|
|
|
|
if self.tools:
|
2026-01-10 05:56:26 +00:00
|
|
|
|
self.valid_tool_names = {tool["function"]["name"] for tool in self.tools}
|
|
|
|
|
|
tool_names = sorted(self.valid_tool_names)
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"🛠️ Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
|
|
|
|
|
|
|
|
|
|
|
|
# Show filtering info if applied
|
|
|
|
|
|
if enabled_toolsets:
|
|
|
|
|
|
print(f" ✅ Enabled toolsets: {', '.join(enabled_toolsets)}")
|
|
|
|
|
|
if disabled_toolsets:
|
|
|
|
|
|
print(f" ❌ Disabled toolsets: {', '.join(disabled_toolsets)}")
|
|
|
|
|
|
elif not self.quiet_mode:
|
2025-09-10 00:43:55 -07:00
|
|
|
|
print("🛠️ No tools loaded (all tools filtered out or unavailable)")
|
|
|
|
|
|
|
|
|
|
|
|
# Check tool requirements
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if self.tools and not self.quiet_mode:
|
2025-09-10 00:43:55 -07:00
|
|
|
|
requirements = check_toolset_requirements()
|
|
|
|
|
|
missing_reqs = [name for name, available in requirements.items() if not available]
|
|
|
|
|
|
if missing_reqs:
|
|
|
|
|
|
print(f"⚠️ Some tools may not work due to missing requirements: {missing_reqs}")
|
|
|
|
|
|
|
|
|
|
|
|
# Show trajectory saving status
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if self.save_trajectories and not self.quiet_mode:
|
2025-09-10 00:43:55 -07:00
|
|
|
|
print("📝 Trajectory saving enabled")
|
2025-10-08 02:33:58 +00:00
|
|
|
|
|
|
|
|
|
|
# Show ephemeral system prompt status
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if self.ephemeral_system_prompt and not self.quiet_mode:
|
2025-10-08 02:33:58 +00:00
|
|
|
|
prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
|
|
|
|
|
|
print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
|
2026-02-01 15:36:26 -08:00
|
|
|
|
|
2026-02-10 06:49:41 +00:00
|
|
|
|
# Show prompt caching status
|
|
|
|
|
|
if self._use_prompt_caching and not self.quiet_mode:
|
|
|
|
|
|
print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
|
|
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
# Session logging setup - auto-save conversation trajectories for debugging
|
|
|
|
|
|
self.session_start = datetime.now()
|
|
|
|
|
|
if session_id:
|
|
|
|
|
|
# Use provided session ID (e.g., from CLI)
|
|
|
|
|
|
self.session_id = session_id
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Generate a new session ID
|
|
|
|
|
|
timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
|
short_uuid = uuid.uuid4().hex[:6]
|
|
|
|
|
|
self.session_id = f"{timestamp_str}_{short_uuid}"
|
|
|
|
|
|
|
2026-02-21 01:20:18 -08:00
|
|
|
|
# Session logs go into ~/.hermes/sessions/ alongside gateway sessions
|
|
|
|
|
|
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
|
|
|
|
|
self.logs_dir = hermes_home / "sessions"
|
|
|
|
|
|
self.logs_dir.mkdir(parents=True, exist_ok=True)
|
2026-02-01 15:36:26 -08:00
|
|
|
|
self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
|
|
|
|
|
|
|
|
|
|
|
|
# Track conversation messages for session logging
|
|
|
|
|
|
self._session_messages: List[Dict[str, Any]] = []
|
2026-02-01 18:01:31 -08:00
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
# Cached system prompt -- built once per session, only rebuilt on compression
|
|
|
|
|
|
self._cached_system_prompt: Optional[str] = None
|
|
|
|
|
|
|
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
|
|
|
|
# Filesystem checkpoint manager (transparent — not a tool)
|
|
|
|
|
|
from tools.checkpoint_manager import CheckpointManager
|
|
|
|
|
|
self._checkpoint_mgr = CheckpointManager(
|
|
|
|
|
|
enabled=checkpoints_enabled,
|
|
|
|
|
|
max_snapshots=checkpoint_max_snapshots,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
# SQLite session store (optional -- provided by CLI or gateway)
|
|
|
|
|
|
self._session_db = session_db
|
|
|
|
|
|
if self._session_db:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._session_db.create_session(
|
|
|
|
|
|
session_id=self.session_id,
|
|
|
|
|
|
source=self.platform or "cli",
|
|
|
|
|
|
model=self.model,
|
|
|
|
|
|
model_config={
|
|
|
|
|
|
"max_iterations": self.max_iterations,
|
|
|
|
|
|
"reasoning_config": reasoning_config,
|
|
|
|
|
|
"max_tokens": max_tokens,
|
|
|
|
|
|
},
|
|
|
|
|
|
user_id=None,
|
|
|
|
|
|
)
|
2026-02-21 03:32:11 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Session DB create_session failed: %s", e)
|
2026-02-19 00:57:31 -08:00
|
|
|
|
|
2026-02-17 17:02:33 -08:00
|
|
|
|
# In-memory todo list for task planning (one per agent/session)
|
|
|
|
|
|
from tools.todo_tool import TodoStore
|
|
|
|
|
|
self._todo_store = TodoStore()
|
|
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
|
|
|
|
|
|
self._memory_store = None
|
|
|
|
|
|
self._memory_enabled = False
|
|
|
|
|
|
self._user_profile_enabled = False
|
2026-02-22 10:15:17 -08:00
|
|
|
|
self._memory_nudge_interval = 10
|
|
|
|
|
|
self._memory_flush_min_turns = 6
|
2026-02-19 00:57:31 -08:00
|
|
|
|
if not skip_memory:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.config import load_config as _load_mem_config
|
|
|
|
|
|
mem_config = _load_mem_config().get("memory", {})
|
|
|
|
|
|
self._memory_enabled = mem_config.get("memory_enabled", False)
|
|
|
|
|
|
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
2026-02-22 10:15:17 -08:00
|
|
|
|
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
|
|
|
|
|
|
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
|
2026-02-19 00:57:31 -08:00
|
|
|
|
if self._memory_enabled or self._user_profile_enabled:
|
|
|
|
|
|
from tools.memory_tool import MemoryStore
|
|
|
|
|
|
self._memory_store = MemoryStore(
|
|
|
|
|
|
memory_char_limit=mem_config.get("memory_char_limit", 2200),
|
|
|
|
|
|
user_char_limit=mem_config.get("user_char_limit", 1375),
|
|
|
|
|
|
)
|
|
|
|
|
|
self._memory_store.load_from_disk()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # Memory is optional -- don't break agent init
|
|
|
|
|
|
|
2026-02-25 19:34:25 -05:00
|
|
|
|
# Honcho AI-native memory (cross-session user modeling)
|
|
|
|
|
|
# Reads ~/.honcho/config.json as the single source of truth.
|
|
|
|
|
|
self._honcho = None # HonchoSessionManager | None
|
|
|
|
|
|
self._honcho_session_key = honcho_session_key
|
|
|
|
|
|
if not skip_memory:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from honcho_integration.client import HonchoClientConfig, get_honcho_client
|
|
|
|
|
|
hcfg = HonchoClientConfig.from_global_config()
|
|
|
|
|
|
if hcfg.enabled and hcfg.api_key:
|
|
|
|
|
|
from honcho_integration.session import HonchoSessionManager
|
|
|
|
|
|
client = get_honcho_client(hcfg)
|
|
|
|
|
|
self._honcho = HonchoSessionManager(
|
|
|
|
|
|
honcho=client,
|
|
|
|
|
|
config=hcfg,
|
2026-02-26 18:02:24 -05:00
|
|
|
|
context_tokens=hcfg.context_tokens,
|
2026-02-25 19:34:25 -05:00
|
|
|
|
)
|
|
|
|
|
|
# Resolve session key: explicit arg > global sessions map > fallback
|
|
|
|
|
|
if not self._honcho_session_key:
|
|
|
|
|
|
self._honcho_session_key = (
|
|
|
|
|
|
hcfg.resolve_session_name()
|
|
|
|
|
|
or "hermes-default"
|
|
|
|
|
|
)
|
|
|
|
|
|
# Ensure session exists in Honcho
|
|
|
|
|
|
self._honcho.get_or_create(self._honcho_session_key)
|
|
|
|
|
|
# Inject session context into the honcho tool module
|
|
|
|
|
|
from tools.honcho_tools import set_session_context
|
|
|
|
|
|
set_session_context(self._honcho, self._honcho_session_key)
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"Honcho active (session: %s, user: %s, workspace: %s)",
|
|
|
|
|
|
self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
if not hcfg.enabled:
|
|
|
|
|
|
logger.debug("Honcho disabled in global config")
|
|
|
|
|
|
elif not hcfg.api_key:
|
|
|
|
|
|
logger.debug("Honcho enabled but no API key configured")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Honcho init failed (non-fatal): %s", e)
|
|
|
|
|
|
self._honcho = None
|
|
|
|
|
|
|
2026-02-22 13:28:13 -08:00
|
|
|
|
# Skills config: nudge interval for skill creation reminders
|
|
|
|
|
|
self._skill_nudge_interval = 15
|
|
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.config import load_config as _load_skills_config
|
|
|
|
|
|
skills_config = _load_skills_config().get("skills", {})
|
|
|
|
|
|
self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2026-02-01 18:01:31 -08:00
|
|
|
|
# Initialize context compressor for automatic context management
|
|
|
|
|
|
# Compresses conversation when approaching model's context limit
|
2026-02-28 04:46:35 -08:00
|
|
|
|
# Configuration via config.yaml (compression section) or environment variables
|
2026-02-01 18:01:31 -08:00
|
|
|
|
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
|
|
|
|
|
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
2026-02-28 04:46:35 -08:00
|
|
|
|
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
2026-02-01 18:01:31 -08:00
|
|
|
|
|
|
|
|
|
|
self.context_compressor = ContextCompressor(
|
|
|
|
|
|
model=self.model,
|
|
|
|
|
|
threshold_percent=compression_threshold,
|
2026-02-22 02:17:33 -08:00
|
|
|
|
protect_first_n=3,
|
|
|
|
|
|
protect_last_n=4,
|
2026-02-01 18:01:31 -08:00
|
|
|
|
summary_target_tokens=500,
|
2026-02-28 04:46:35 -08:00
|
|
|
|
summary_model_override=compression_summary_model,
|
2026-02-01 18:01:31 -08:00
|
|
|
|
quiet_mode=self.quiet_mode,
|
2026-03-05 16:09:57 -08:00
|
|
|
|
base_url=self.base_url,
|
2026-02-01 18:01:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
self.compression_enabled = compression_enabled
|
2026-02-22 10:15:17 -08:00
|
|
|
|
self._user_turn_count = 0
|
2026-03-01 00:23:19 -08:00
|
|
|
|
|
|
|
|
|
|
# Cumulative token usage for the session
|
|
|
|
|
|
self.session_prompt_tokens = 0
|
|
|
|
|
|
self.session_completion_tokens = 0
|
|
|
|
|
|
self.session_total_tokens = 0
|
|
|
|
|
|
self.session_api_calls = 0
|
2026-02-01 18:01:31 -08:00
|
|
|
|
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
if compression_enabled:
|
|
|
|
|
|
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-26 20:23:56 -08:00
|
|
|
|
def _max_tokens_param(self, value: int) -> dict:
|
|
|
|
|
|
"""Return the correct max tokens kwarg for the current provider.
|
|
|
|
|
|
|
|
|
|
|
|
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
|
|
|
|
|
|
'max_completion_tokens'. OpenRouter, local models, and older
|
|
|
|
|
|
OpenAI models use 'max_tokens'.
|
|
|
|
|
|
"""
|
|
|
|
|
|
_is_direct_openai = (
|
|
|
|
|
|
"api.openai.com" in self.base_url.lower()
|
|
|
|
|
|
and "openrouter" not in self.base_url.lower()
|
|
|
|
|
|
)
|
|
|
|
|
|
if _is_direct_openai:
|
|
|
|
|
|
return {"max_completion_tokens": value}
|
|
|
|
|
|
return {"max_tokens": value}
|
|
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
def _has_content_after_think_block(self, content: str) -> bool:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Check if content has actual text after any <think></think> blocks.
|
|
|
|
|
|
|
|
|
|
|
|
This detects cases where the model only outputs reasoning but no actual
|
|
|
|
|
|
response, which indicates an incomplete generation that should be retried.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
content: The assistant message content to check
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
True if there's meaningful content after think blocks, False otherwise
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# Remove all <think>...</think> blocks (including nested ones, non-greedy)
|
|
|
|
|
|
cleaned = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if there's any non-whitespace content remaining
|
|
|
|
|
|
return bool(cleaned.strip())
|
|
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
def _strip_think_blocks(self, content: str) -> str:
|
|
|
|
|
|
"""Remove <think>...</think> blocks from content, returning only visible text."""
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
return re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
|
|
|
|
|
|
def _looks_like_codex_intermediate_ack(
|
|
|
|
|
|
self,
|
|
|
|
|
|
user_message: str,
|
|
|
|
|
|
assistant_content: str,
|
|
|
|
|
|
messages: List[Dict[str, Any]],
|
|
|
|
|
|
) -> bool:
|
|
|
|
|
|
"""Detect a planning/ack message that should continue instead of ending the turn."""
|
|
|
|
|
|
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
|
|
|
|
|
|
if not assistant_text:
|
|
|
|
|
|
return False
|
|
|
|
|
|
if len(assistant_text) > 1200:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
has_future_ack = bool(
|
|
|
|
|
|
re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
|
|
|
|
|
|
)
|
|
|
|
|
|
if not has_future_ack:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
action_markers = (
|
|
|
|
|
|
"look into",
|
2026-02-25 18:56:06 -08:00
|
|
|
|
"look at",
|
2026-02-25 18:11:26 -08:00
|
|
|
|
"inspect",
|
|
|
|
|
|
"scan",
|
|
|
|
|
|
"check",
|
|
|
|
|
|
"analyz",
|
|
|
|
|
|
"review",
|
|
|
|
|
|
"explore",
|
|
|
|
|
|
"read",
|
|
|
|
|
|
"open",
|
|
|
|
|
|
"run",
|
|
|
|
|
|
"test",
|
|
|
|
|
|
"fix",
|
|
|
|
|
|
"debug",
|
|
|
|
|
|
"search",
|
|
|
|
|
|
"find",
|
|
|
|
|
|
"walkthrough",
|
|
|
|
|
|
"report back",
|
|
|
|
|
|
"summarize",
|
|
|
|
|
|
)
|
2026-02-25 18:56:06 -08:00
|
|
|
|
workspace_markers = (
|
|
|
|
|
|
"directory",
|
|
|
|
|
|
"current directory",
|
|
|
|
|
|
"current dir",
|
|
|
|
|
|
"cwd",
|
|
|
|
|
|
"repo",
|
|
|
|
|
|
"repository",
|
|
|
|
|
|
"codebase",
|
|
|
|
|
|
"project",
|
|
|
|
|
|
"folder",
|
|
|
|
|
|
"filesystem",
|
|
|
|
|
|
"file tree",
|
|
|
|
|
|
"files",
|
|
|
|
|
|
"path",
|
|
|
|
|
|
)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
|
|
|
|
|
|
user_text = (user_message or "").strip().lower()
|
2026-02-25 18:56:06 -08:00
|
|
|
|
user_targets_workspace = (
|
|
|
|
|
|
any(marker in user_text for marker in workspace_markers)
|
|
|
|
|
|
or "~/" in user_text
|
|
|
|
|
|
or "/" in user_text
|
|
|
|
|
|
)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
|
2026-02-25 18:56:06 -08:00
|
|
|
|
assistant_targets_workspace = any(
|
|
|
|
|
|
marker in assistant_text for marker in workspace_markers
|
|
|
|
|
|
)
|
|
|
|
|
|
return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
|
2026-02-22 21:33:28 -08:00
|
|
|
|
|
|
|
|
|
|
|
2026-02-01 22:48:18 -08:00
|
|
|
|
def _extract_reasoning(self, assistant_message) -> Optional[str]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Extract reasoning/thinking content from an assistant message.
|
|
|
|
|
|
|
|
|
|
|
|
OpenRouter and various providers can return reasoning in multiple formats:
|
|
|
|
|
|
1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
|
|
|
|
|
|
2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
|
|
|
|
|
|
3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
assistant_message: The assistant message object from the API response
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Combined reasoning text, or None if no reasoning found
|
|
|
|
|
|
"""
|
|
|
|
|
|
reasoning_parts = []
|
|
|
|
|
|
|
|
|
|
|
|
# Check direct reasoning field
|
|
|
|
|
|
if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
|
|
|
|
|
|
reasoning_parts.append(assistant_message.reasoning)
|
|
|
|
|
|
|
|
|
|
|
|
# Check reasoning_content field (alternative name used by some providers)
|
|
|
|
|
|
if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
|
|
|
|
|
|
# Don't duplicate if same as reasoning
|
|
|
|
|
|
if assistant_message.reasoning_content not in reasoning_parts:
|
|
|
|
|
|
reasoning_parts.append(assistant_message.reasoning_content)
|
|
|
|
|
|
|
|
|
|
|
|
# Check reasoning_details array (OpenRouter unified format)
|
|
|
|
|
|
# Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
|
|
|
|
|
|
if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
|
|
|
|
|
|
for detail in assistant_message.reasoning_details:
|
|
|
|
|
|
if isinstance(detail, dict):
|
|
|
|
|
|
# Extract summary from reasoning detail object
|
|
|
|
|
|
summary = detail.get('summary') or detail.get('content') or detail.get('text')
|
|
|
|
|
|
if summary and summary not in reasoning_parts:
|
|
|
|
|
|
reasoning_parts.append(summary)
|
|
|
|
|
|
|
|
|
|
|
|
# Combine all reasoning parts
|
|
|
|
|
|
if reasoning_parts:
|
|
|
|
|
|
return "\n\n".join(reasoning_parts)
|
|
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
2026-02-20 23:23:32 -08:00
|
|
|
|
def _cleanup_task_resources(self, task_id: str) -> None:
|
|
|
|
|
|
"""Clean up VM and browser resources for a given task."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
cleanup_vm(task_id)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.warning(f"Failed to cleanup VM for task {task_id}: {e}")
|
|
|
|
|
|
try:
|
|
|
|
|
|
cleanup_browser(task_id)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
|
|
|
|
|
|
|
2026-02-22 17:10:24 -08:00
|
|
|
|
def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None):
|
|
|
|
|
|
"""Save session state to both JSON log and SQLite on any exit path.
|
|
|
|
|
|
|
|
|
|
|
|
Ensures conversations are never lost, even on errors or early returns.
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._session_messages = messages
|
|
|
|
|
|
self._save_session_log(messages)
|
|
|
|
|
|
self._flush_messages_to_session_db(messages, conversation_history)
|
|
|
|
|
|
|
|
|
|
|
|
def _log_msg_to_db(self, msg: Dict):
|
|
|
|
|
|
"""Log a single message to SQLite immediately. Called after each messages.append()."""
|
|
|
|
|
|
if not self._session_db:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
role = msg.get("role", "unknown")
|
|
|
|
|
|
content = msg.get("content")
|
|
|
|
|
|
tool_calls_data = None
|
|
|
|
|
|
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
|
|
|
|
|
tool_calls_data = [
|
|
|
|
|
|
{"name": tc.function.name, "arguments": tc.function.arguments}
|
|
|
|
|
|
for tc in msg.tool_calls
|
|
|
|
|
|
]
|
|
|
|
|
|
elif isinstance(msg.get("tool_calls"), list):
|
|
|
|
|
|
tool_calls_data = msg["tool_calls"]
|
|
|
|
|
|
self._session_db.append_message(
|
|
|
|
|
|
session_id=self.session_id,
|
|
|
|
|
|
role=role,
|
|
|
|
|
|
content=content,
|
|
|
|
|
|
tool_name=msg.get("tool_name"),
|
|
|
|
|
|
tool_calls=tool_calls_data,
|
|
|
|
|
|
tool_call_id=msg.get("tool_call_id"),
|
|
|
|
|
|
finish_reason=msg.get("finish_reason"),
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Session DB log_msg failed: %s", e)
|
|
|
|
|
|
|
2026-02-21 00:05:39 -08:00
|
|
|
|
def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
|
|
|
|
|
|
"""Persist any un-logged messages to the SQLite session store.
|
|
|
|
|
|
|
|
|
|
|
|
Called both at the normal end of run_conversation and from every early-
|
|
|
|
|
|
return path so that tool calls, tool responses, and assistant messages
|
|
|
|
|
|
are never lost even when the conversation errors out.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not self._session_db:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
2026-02-27 03:21:49 -08:00
|
|
|
|
start_idx = len(conversation_history) if conversation_history else 0
|
2026-02-21 00:05:39 -08:00
|
|
|
|
for msg in messages[start_idx:]:
|
|
|
|
|
|
role = msg.get("role", "unknown")
|
|
|
|
|
|
content = msg.get("content")
|
|
|
|
|
|
tool_calls_data = None
|
|
|
|
|
|
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
|
|
|
|
|
tool_calls_data = [
|
|
|
|
|
|
{"name": tc.function.name, "arguments": tc.function.arguments}
|
|
|
|
|
|
for tc in msg.tool_calls
|
|
|
|
|
|
]
|
|
|
|
|
|
elif isinstance(msg.get("tool_calls"), list):
|
|
|
|
|
|
tool_calls_data = msg["tool_calls"]
|
|
|
|
|
|
self._session_db.append_message(
|
|
|
|
|
|
session_id=self.session_id,
|
|
|
|
|
|
role=role,
|
|
|
|
|
|
content=content,
|
|
|
|
|
|
tool_name=msg.get("tool_name"),
|
|
|
|
|
|
tool_calls=tool_calls_data,
|
|
|
|
|
|
tool_call_id=msg.get("tool_call_id"),
|
|
|
|
|
|
finish_reason=msg.get("finish_reason"),
|
|
|
|
|
|
)
|
2026-02-21 03:32:11 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Session DB append_message failed: %s", e)
|
2026-02-21 00:05:39 -08:00
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Get messages up to (but not including) the last assistant turn.
|
|
|
|
|
|
|
|
|
|
|
|
This is used when we need to "roll back" to the last successful point
|
|
|
|
|
|
in the conversation, typically when the final assistant message is
|
|
|
|
|
|
incomplete or malformed.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
messages: Full message list
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Messages up to the last complete assistant turn (ending with user/tool message)
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not messages:
|
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
# Find the index of the last assistant message
|
|
|
|
|
|
last_assistant_idx = None
|
|
|
|
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
|
|
|
|
if messages[i].get("role") == "assistant":
|
|
|
|
|
|
last_assistant_idx = i
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if last_assistant_idx is None:
|
|
|
|
|
|
# No assistant message found, return all messages
|
|
|
|
|
|
return messages.copy()
|
|
|
|
|
|
|
|
|
|
|
|
# Return everything up to (not including) the last assistant message
|
|
|
|
|
|
return messages[:last_assistant_idx]
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
def _format_tools_for_system_message(self) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Format tool definitions for the system message in the trajectory format.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
str: JSON string representation of tool definitions
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not self.tools:
|
|
|
|
|
|
return "[]"
|
|
|
|
|
|
|
|
|
|
|
|
# Convert tool definitions to the format expected in trajectories
|
|
|
|
|
|
formatted_tools = []
|
|
|
|
|
|
for tool in self.tools:
|
|
|
|
|
|
func = tool["function"]
|
|
|
|
|
|
formatted_tool = {
|
|
|
|
|
|
"name": func["name"],
|
|
|
|
|
|
"description": func.get("description", ""),
|
|
|
|
|
|
"parameters": func.get("parameters", {}),
|
|
|
|
|
|
"required": None # Match the format in the example
|
|
|
|
|
|
}
|
|
|
|
|
|
formatted_tools.append(formatted_tool)
|
|
|
|
|
|
|
2025-11-05 03:47:17 +00:00
|
|
|
|
return json.dumps(formatted_tools, ensure_ascii=False)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Convert internal message format to trajectory format for saving.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
messages (List[Dict]): Internal message history
|
|
|
|
|
|
user_query (str): Original user query
|
|
|
|
|
|
completed (bool): Whether the conversation completed successfully
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
List[Dict]: Messages in trajectory format
|
|
|
|
|
|
"""
|
|
|
|
|
|
trajectory = []
|
|
|
|
|
|
|
|
|
|
|
|
# Add system message with tool definitions
|
|
|
|
|
|
system_msg = (
|
|
|
|
|
|
"You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
|
|
|
|
|
|
"You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
|
|
|
|
|
|
"with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
|
|
|
|
|
|
"into functions. After calling & executing the functions, you will be provided with function results within "
|
|
|
|
|
|
"<tool_response> </tool_response> XML tags. Here are the available tools:\n"
|
|
|
|
|
|
f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
|
|
|
|
|
|
"For each function call return a JSON object, with the following pydantic model json schema for each:\n"
|
|
|
|
|
|
"{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
|
|
|
|
|
|
"'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
|
|
|
|
|
|
"Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
|
|
|
|
|
|
"Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "system",
|
|
|
|
|
|
"value": system_msg
|
|
|
|
|
|
})
|
|
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Add the actual user prompt (from the dataset) as the first human message
|
2025-09-10 00:43:55 -07:00
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "human",
|
|
|
|
|
|
"value": user_query
|
|
|
|
|
|
})
|
|
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Skip the first message (the user query) since we already added it above.
|
|
|
|
|
|
# Prefill messages are injected at API-call time only (not in the messages
|
|
|
|
|
|
# list), so no offset adjustment is needed here.
|
|
|
|
|
|
i = 1
|
2026-02-08 10:49:24 +00:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
while i < len(messages):
|
|
|
|
|
|
msg = messages[i]
|
|
|
|
|
|
|
|
|
|
|
|
if msg["role"] == "assistant":
|
|
|
|
|
|
# Check if this message has tool calls
|
|
|
|
|
|
if "tool_calls" in msg and msg["tool_calls"]:
|
|
|
|
|
|
# Format assistant message with tool calls
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Add <think> tags around reasoning for trajectory storage
|
2025-09-10 00:43:55 -07:00
|
|
|
|
content = ""
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Prepend reasoning in <think> tags if available (native thinking tokens)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
if msg.get("reasoning") and msg["reasoning"].strip():
|
2026-01-29 06:10:24 +00:00
|
|
|
|
content = f"<think>\n{msg['reasoning']}\n</think>\n"
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
if msg.get("content") and msg["content"].strip():
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Convert any <REASONING_SCRATCHPAD> tags to <think> tags
|
|
|
|
|
|
# (used when native thinking is disabled and model reasons via XML)
|
2026-02-23 09:55:09 -08:00
|
|
|
|
content += convert_scratchpad_to_think(msg["content"]) + "\n"
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
# Add tool calls wrapped in XML tags
|
|
|
|
|
|
for tool_call in msg["tool_calls"]:
|
2026-01-10 13:04:43 +00:00
|
|
|
|
# Parse arguments - should always succeed since we validate during conversation
|
|
|
|
|
|
# but keep try-except as safety net
|
|
|
|
|
|
try:
|
|
|
|
|
|
arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
|
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
|
# This shouldn't happen since we validate and retry during conversation,
|
|
|
|
|
|
# but if it does, log warning and use empty dict
|
|
|
|
|
|
logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
|
|
|
|
|
|
arguments = {}
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
tool_call_json = {
|
|
|
|
|
|
"name": tool_call["function"]["name"],
|
2026-01-10 13:04:43 +00:00
|
|
|
|
"arguments": arguments
|
2025-09-10 00:43:55 -07:00
|
|
|
|
}
|
2025-11-05 03:47:17 +00:00
|
|
|
|
content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-08 20:19:14 +00:00
|
|
|
|
# Ensure every gpt turn has a <think> block (empty if no reasoning)
|
|
|
|
|
|
# so the format is consistent for training data
|
|
|
|
|
|
if "<think>" not in content:
|
|
|
|
|
|
content = "<think>\n</think>\n" + content
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "gpt",
|
|
|
|
|
|
"value": content.rstrip()
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
# Collect all subsequent tool responses
|
|
|
|
|
|
tool_responses = []
|
|
|
|
|
|
j = i + 1
|
|
|
|
|
|
while j < len(messages) and messages[j]["role"] == "tool":
|
|
|
|
|
|
tool_msg = messages[j]
|
|
|
|
|
|
# Format tool response with XML tags
|
|
|
|
|
|
tool_response = f"<tool_response>\n"
|
|
|
|
|
|
|
|
|
|
|
|
# Try to parse tool content as JSON if it looks like JSON
|
|
|
|
|
|
tool_content = tool_msg["content"]
|
|
|
|
|
|
try:
|
|
|
|
|
|
if tool_content.strip().startswith(("{", "[")):
|
|
|
|
|
|
tool_content = json.loads(tool_content)
|
|
|
|
|
|
except (json.JSONDecodeError, AttributeError):
|
|
|
|
|
|
pass # Keep as string if not valid JSON
|
|
|
|
|
|
|
|
|
|
|
|
tool_response += json.dumps({
|
|
|
|
|
|
"tool_call_id": tool_msg.get("tool_call_id", ""),
|
|
|
|
|
|
"name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
|
|
|
|
|
|
"content": tool_content
|
2025-11-05 03:47:17 +00:00
|
|
|
|
}, ensure_ascii=False)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
tool_response += "\n</tool_response>"
|
|
|
|
|
|
tool_responses.append(tool_response)
|
|
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
|
|
|
|
# Add all tool responses as a single message
|
|
|
|
|
|
if tool_responses:
|
|
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "tool",
|
|
|
|
|
|
"value": "\n".join(tool_responses)
|
|
|
|
|
|
})
|
|
|
|
|
|
i = j - 1 # Skip the tool messages we just processed
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Regular assistant message without tool calls
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Add <think> tags around reasoning for trajectory storage
|
2026-01-08 08:57:51 +00:00
|
|
|
|
content = ""
|
|
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Prepend reasoning in <think> tags if available (native thinking tokens)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
if msg.get("reasoning") and msg["reasoning"].strip():
|
2026-01-29 06:10:24 +00:00
|
|
|
|
content = f"<think>\n{msg['reasoning']}\n</think>\n"
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Convert any <REASONING_SCRATCHPAD> tags to <think> tags
|
|
|
|
|
|
# (used when native thinking is disabled and model reasons via XML)
|
|
|
|
|
|
raw_content = msg["content"] or ""
|
2026-02-23 09:55:09 -08:00
|
|
|
|
content += convert_scratchpad_to_think(raw_content)
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
2026-02-08 20:19:14 +00:00
|
|
|
|
# Ensure every gpt turn has a <think> block (empty if no reasoning)
|
|
|
|
|
|
if "<think>" not in content:
|
|
|
|
|
|
content = "<think>\n</think>\n" + content
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "gpt",
|
2026-01-29 06:10:24 +00:00
|
|
|
|
"value": content.strip()
|
2025-09-10 00:43:55 -07:00
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
elif msg["role"] == "user":
|
|
|
|
|
|
trajectory.append({
|
|
|
|
|
|
"from": "human",
|
|
|
|
|
|
"value": msg["content"]
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
return trajectory
|
|
|
|
|
|
|
|
|
|
|
|
def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Save conversation trajectory to JSONL file.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
messages (List[Dict]): Complete message history
|
|
|
|
|
|
user_query (str): Original user query
|
|
|
|
|
|
completed (bool): Whether the conversation completed successfully
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not self.save_trajectories:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
|
2026-02-23 09:55:09 -08:00
|
|
|
|
_save_trajectory_to_file(trajectory, self.model, completed)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-20 17:24:00 -08:00
|
|
|
|
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
|
|
|
|
|
|
if not key:
|
|
|
|
|
|
return None
|
|
|
|
|
|
if len(key) <= 12:
|
|
|
|
|
|
return "***"
|
|
|
|
|
|
return f"{key[:8]}...{key[-4:]}"
|
|
|
|
|
|
|
|
|
|
|
|
def _dump_api_request_debug(
|
|
|
|
|
|
self,
|
|
|
|
|
|
api_kwargs: Dict[str, Any],
|
|
|
|
|
|
*,
|
|
|
|
|
|
reason: str,
|
|
|
|
|
|
error: Optional[Exception] = None,
|
|
|
|
|
|
) -> Optional[Path]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Dump a debug-friendly HTTP request record for chat.completions.create().
|
|
|
|
|
|
|
|
|
|
|
|
Captures the request body from api_kwargs (excluding transport-only keys
|
|
|
|
|
|
like timeout). Intended for debugging provider-side 4xx failures where
|
|
|
|
|
|
retries are not useful.
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
body = copy.deepcopy(api_kwargs)
|
|
|
|
|
|
body.pop("timeout", None)
|
|
|
|
|
|
body = {k: v for k, v in body.items() if v is not None}
|
|
|
|
|
|
|
|
|
|
|
|
api_key = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
api_key = getattr(self.client, "api_key", None)
|
2026-02-21 03:32:11 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Could not extract API key for debug dump: %s", e)
|
2026-02-20 17:24:00 -08:00
|
|
|
|
|
|
|
|
|
|
dump_payload: Dict[str, Any] = {
|
|
|
|
|
|
"timestamp": datetime.now().isoformat(),
|
|
|
|
|
|
"session_id": self.session_id,
|
|
|
|
|
|
"reason": reason,
|
|
|
|
|
|
"request": {
|
|
|
|
|
|
"method": "POST",
|
|
|
|
|
|
"url": f"{self.base_url.rstrip('/')}/chat/completions",
|
|
|
|
|
|
"headers": {
|
|
|
|
|
|
"Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}",
|
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
|
},
|
|
|
|
|
|
"body": body,
|
|
|
|
|
|
},
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if error is not None:
|
|
|
|
|
|
error_info: Dict[str, Any] = {
|
|
|
|
|
|
"type": type(error).__name__,
|
|
|
|
|
|
"message": str(error),
|
|
|
|
|
|
}
|
|
|
|
|
|
for attr_name in ("status_code", "request_id", "code", "param", "type"):
|
|
|
|
|
|
attr_value = getattr(error, attr_name, None)
|
|
|
|
|
|
if attr_value is not None:
|
|
|
|
|
|
error_info[attr_name] = attr_value
|
|
|
|
|
|
|
|
|
|
|
|
body_attr = getattr(error, "body", None)
|
|
|
|
|
|
if body_attr is not None:
|
|
|
|
|
|
error_info["body"] = body_attr
|
|
|
|
|
|
|
|
|
|
|
|
response_obj = getattr(error, "response", None)
|
|
|
|
|
|
if response_obj is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
error_info["response_status"] = getattr(response_obj, "status_code", None)
|
|
|
|
|
|
error_info["response_text"] = response_obj.text
|
2026-02-21 03:32:11 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Could not extract error response details: %s", e)
|
2026-02-20 17:24:00 -08:00
|
|
|
|
|
|
|
|
|
|
dump_payload["error"] = error_info
|
|
|
|
|
|
|
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
|
|
|
|
dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
|
|
|
|
|
|
dump_file.write_text(
|
|
|
|
|
|
json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
|
|
|
|
|
|
encoding="utf-8",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print(f"{self.log_prefix}🧾 Request debug dump written to: {dump_file}")
|
|
|
|
|
|
|
|
|
|
|
|
if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
|
|
|
|
|
|
print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
|
|
|
|
|
|
|
|
|
|
|
|
return dump_file
|
|
|
|
|
|
except Exception as dump_error:
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.warning(f"Failed to dump API request debug payload: {dump_error}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
2026-02-25 04:22:03 -08:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _clean_session_content(content: str) -> str:
|
|
|
|
|
|
"""Convert REASONING_SCRATCHPAD to think tags and clean up whitespace."""
|
|
|
|
|
|
if not content:
|
|
|
|
|
|
return content
|
|
|
|
|
|
content = convert_scratchpad_to_think(content)
|
|
|
|
|
|
content = re.sub(r'\n+(<think>)', r'\n\1', content)
|
|
|
|
|
|
content = re.sub(r'(</think>)\n+', r'\1\n', content)
|
|
|
|
|
|
return content.strip()
|
|
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
def _save_session_log(self, messages: List[Dict[str, Any]] = None):
|
|
|
|
|
|
"""
|
2026-02-21 01:26:37 -08:00
|
|
|
|
Save the full raw session to a JSON file.
|
|
|
|
|
|
|
|
|
|
|
|
Stores every message exactly as the agent sees it: user messages,
|
|
|
|
|
|
assistant messages (with reasoning, finish_reason, tool_calls),
|
|
|
|
|
|
tool responses (with tool_call_id, tool_name), and injected system
|
|
|
|
|
|
messages (compression summaries, todo snapshots, etc.).
|
|
|
|
|
|
|
2026-02-25 04:22:03 -08:00
|
|
|
|
REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
|
2026-02-21 01:26:37 -08:00
|
|
|
|
Overwritten after each turn so it always reflects the latest state.
|
2026-02-01 15:36:26 -08:00
|
|
|
|
"""
|
|
|
|
|
|
messages = messages or self._session_messages
|
|
|
|
|
|
if not messages:
|
|
|
|
|
|
return
|
2026-02-21 01:26:37 -08:00
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
try:
|
2026-02-25 04:22:03 -08:00
|
|
|
|
# Clean assistant content for session logs
|
|
|
|
|
|
cleaned = []
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
if msg.get("role") == "assistant" and msg.get("content"):
|
|
|
|
|
|
msg = dict(msg)
|
|
|
|
|
|
msg["content"] = self._clean_session_content(msg["content"])
|
|
|
|
|
|
cleaned.append(msg)
|
|
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
entry = {
|
|
|
|
|
|
"session_id": self.session_id,
|
|
|
|
|
|
"model": self.model,
|
2026-02-21 01:26:37 -08:00
|
|
|
|
"base_url": self.base_url,
|
|
|
|
|
|
"platform": self.platform,
|
2026-02-01 15:36:26 -08:00
|
|
|
|
"session_start": self.session_start.isoformat(),
|
|
|
|
|
|
"last_updated": datetime.now().isoformat(),
|
2026-03-01 23:50:54 -08:00
|
|
|
|
"system_prompt": self._cached_system_prompt or "",
|
2026-03-02 00:13:41 -08:00
|
|
|
|
"tools": self.tools or [],
|
2026-02-25 04:22:03 -08:00
|
|
|
|
"message_count": len(cleaned),
|
|
|
|
|
|
"messages": cleaned,
|
2026-02-01 15:36:26 -08:00
|
|
|
|
}
|
2026-02-21 01:26:37 -08:00
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
with open(self.session_log_file, "w", encoding="utf-8") as f:
|
2026-02-21 01:26:37 -08:00
|
|
|
|
json.dump(entry, f, indent=2, ensure_ascii=False, default=str)
|
|
|
|
|
|
|
2026-02-01 15:36:26 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.warning(f"Failed to save session log: {e}")
|
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
|
def interrupt(self, message: str = None) -> None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Request the agent to interrupt its current tool-calling loop.
|
|
|
|
|
|
|
|
|
|
|
|
Call this from another thread (e.g., input handler, message receiver)
|
|
|
|
|
|
to gracefully stop the agent and process a new message.
|
|
|
|
|
|
|
2026-02-10 16:34:27 -08:00
|
|
|
|
Also signals long-running tool executions (e.g. terminal commands)
|
|
|
|
|
|
to terminate early, so the agent can respond immediately.
|
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
|
Args:
|
|
|
|
|
|
message: Optional new message that triggered the interrupt.
|
|
|
|
|
|
If provided, the agent will include this in its response context.
|
|
|
|
|
|
|
|
|
|
|
|
Example (CLI):
|
|
|
|
|
|
# In a separate input thread:
|
|
|
|
|
|
if user_typed_something:
|
|
|
|
|
|
agent.interrupt(user_input)
|
|
|
|
|
|
|
|
|
|
|
|
Example (Messaging):
|
|
|
|
|
|
# When new message arrives for active session:
|
|
|
|
|
|
if session_has_running_agent:
|
|
|
|
|
|
running_agent.interrupt(new_message.text)
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._interrupt_requested = True
|
|
|
|
|
|
self._interrupt_message = message
|
2026-02-23 02:11:33 -08:00
|
|
|
|
# Signal all tools to abort any in-flight operations immediately
|
|
|
|
|
|
_set_interrupt(True)
|
2026-02-20 03:15:53 -08:00
|
|
|
|
# Propagate interrupt to any running child agents (subagent delegation)
|
|
|
|
|
|
for child in self._active_children:
|
|
|
|
|
|
try:
|
|
|
|
|
|
child.interrupt(message)
|
2026-02-21 03:32:11 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Failed to propagate interrupt to child agent: %s", e)
|
2026-02-03 16:15:49 -08:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"\n⚡ Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
|
|
|
|
|
|
|
|
|
|
|
|
def clear_interrupt(self) -> None:
|
2026-02-25 03:45:47 -08:00
|
|
|
|
"""Clear any pending interrupt request and the global tool interrupt signal."""
|
2026-02-03 16:15:49 -08:00
|
|
|
|
self._interrupt_requested = False
|
|
|
|
|
|
self._interrupt_message = None
|
2026-02-25 03:45:47 -08:00
|
|
|
|
_set_interrupt(False)
|
2026-02-17 17:02:33 -08:00
|
|
|
|
|
|
|
|
|
|
def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Recover todo state from conversation history.
|
|
|
|
|
|
|
|
|
|
|
|
The gateway creates a fresh AIAgent per message, so the in-memory
|
|
|
|
|
|
TodoStore is empty. We scan the history for the most recent todo
|
|
|
|
|
|
tool response and replay it to reconstruct the state.
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Walk history backwards to find the most recent todo tool response
|
|
|
|
|
|
last_todo_response = None
|
|
|
|
|
|
for msg in reversed(history):
|
|
|
|
|
|
if msg.get("role") != "tool":
|
|
|
|
|
|
continue
|
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
|
# Quick check: todo responses contain "todos" key
|
|
|
|
|
|
if '"todos"' not in content:
|
|
|
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
|
|
|
data = json.loads(content)
|
|
|
|
|
|
if "todos" in data and isinstance(data["todos"], list):
|
|
|
|
|
|
last_todo_response = data["todos"]
|
|
|
|
|
|
break
|
|
|
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if last_todo_response:
|
|
|
|
|
|
# Replay the items into the store (replace mode)
|
|
|
|
|
|
self._todo_store.write(last_todo_response, merge=False)
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"{self.log_prefix}📋 Restored {len(last_todo_response)} todo item(s) from history")
|
2026-02-23 02:11:33 -08:00
|
|
|
|
_set_interrupt(False)
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
|
def is_interrupted(self) -> bool:
|
|
|
|
|
|
"""Check if an interrupt has been requested."""
|
|
|
|
|
|
return self._interrupt_requested
|
2026-02-25 19:34:25 -05:00
|
|
|
|
|
|
|
|
|
|
# ── Honcho integration helpers ──
|
|
|
|
|
|
|
|
|
|
|
|
def _honcho_prefetch(self, user_message: str) -> str:
|
|
|
|
|
|
"""Fetch user context from Honcho for system prompt injection.
|
|
|
|
|
|
|
|
|
|
|
|
Returns a formatted context block, or empty string if unavailable.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not self._honcho or not self._honcho_session_key:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
try:
|
|
|
|
|
|
ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
|
|
|
|
|
|
if not ctx:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
parts = []
|
|
|
|
|
|
rep = ctx.get("representation", "")
|
|
|
|
|
|
card = ctx.get("card", "")
|
|
|
|
|
|
if rep:
|
|
|
|
|
|
parts.append(rep)
|
|
|
|
|
|
if card:
|
|
|
|
|
|
parts.append(card)
|
|
|
|
|
|
if not parts:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
return "# Honcho User Context\n" + "\n\n".join(parts)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Honcho prefetch failed (non-fatal): %s", e)
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
2026-02-26 18:02:24 -05:00
|
|
|
|
def _honcho_save_user_observation(self, content: str) -> str:
|
|
|
|
|
|
"""Route a memory tool target=user add to Honcho.
|
|
|
|
|
|
|
|
|
|
|
|
Sends the content as a user peer message so Honcho's reasoning
|
|
|
|
|
|
model can incorporate it into the user representation.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not content or not content.strip():
|
|
|
|
|
|
return json.dumps({"success": False, "error": "Content cannot be empty."})
|
|
|
|
|
|
try:
|
|
|
|
|
|
session = self._honcho.get_or_create(self._honcho_session_key)
|
|
|
|
|
|
session.add_message("user", f"[observation] {content.strip()}")
|
|
|
|
|
|
self._honcho.save(session)
|
|
|
|
|
|
return json.dumps({
|
|
|
|
|
|
"success": True,
|
|
|
|
|
|
"target": "user",
|
|
|
|
|
|
"message": "Saved to Honcho user model.",
|
|
|
|
|
|
})
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Honcho user observation failed: %s", e)
|
|
|
|
|
|
return json.dumps({"success": False, "error": f"Honcho save failed: {e}"})
|
|
|
|
|
|
|
2026-02-25 19:34:25 -05:00
|
|
|
|
def _honcho_sync(self, user_content: str, assistant_content: str) -> None:
|
|
|
|
|
|
"""Sync the user/assistant message pair to Honcho."""
|
|
|
|
|
|
if not self._honcho or not self._honcho_session_key:
|
|
|
|
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
|
|
session = self._honcho.get_or_create(self._honcho_session_key)
|
|
|
|
|
|
session.add_message("user", user_content)
|
|
|
|
|
|
session.add_message("assistant", assistant_content)
|
|
|
|
|
|
self._honcho.save(session)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Honcho sync failed (non-fatal): %s", e)
|
|
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
def _build_system_prompt(self, system_message: str = None) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Assemble the full system prompt from all layers.
|
|
|
|
|
|
|
|
|
|
|
|
Called once per session (cached on self._cached_system_prompt) and only
|
|
|
|
|
|
rebuilt after context compression events. This ensures the system prompt
|
|
|
|
|
|
is stable across all turns in a session, maximizing prefix cache hits.
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Layers (in order):
|
|
|
|
|
|
# 1. Default agent identity (always present)
|
|
|
|
|
|
# 2. User / gateway system prompt (if provided)
|
|
|
|
|
|
# 3. Persistent memory (frozen snapshot)
|
|
|
|
|
|
# 4. Skills guidance (if skills tools are loaded)
|
|
|
|
|
|
# 5. Context files (SOUL.md, AGENTS.md, .cursorrules)
|
|
|
|
|
|
# 6. Current date & time (frozen at build time)
|
|
|
|
|
|
# 7. Platform-specific formatting hint
|
|
|
|
|
|
prompt_parts = [DEFAULT_AGENT_IDENTITY]
|
|
|
|
|
|
|
2026-02-22 02:31:52 -08:00
|
|
|
|
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
|
|
|
|
|
tool_guidance = []
|
|
|
|
|
|
if "memory" in self.valid_tool_names:
|
|
|
|
|
|
tool_guidance.append(MEMORY_GUIDANCE)
|
|
|
|
|
|
if "session_search" in self.valid_tool_names:
|
|
|
|
|
|
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
2026-02-22 13:28:13 -08:00
|
|
|
|
if "skill_manage" in self.valid_tool_names:
|
|
|
|
|
|
tool_guidance.append(SKILLS_GUIDANCE)
|
2026-02-22 02:31:52 -08:00
|
|
|
|
if tool_guidance:
|
|
|
|
|
|
prompt_parts.append(" ".join(tool_guidance))
|
|
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Note: ephemeral_system_prompt is NOT included here. It's injected at
|
|
|
|
|
|
# API-call time only so it stays out of the cached/stored system prompt.
|
|
|
|
|
|
if system_message is not None:
|
|
|
|
|
|
prompt_parts.append(system_message)
|
2026-02-19 00:57:31 -08:00
|
|
|
|
|
|
|
|
|
|
if self._memory_store:
|
|
|
|
|
|
if self._memory_enabled:
|
|
|
|
|
|
mem_block = self._memory_store.format_for_system_prompt("memory")
|
|
|
|
|
|
if mem_block:
|
|
|
|
|
|
prompt_parts.append(mem_block)
|
2026-02-26 18:07:33 -05:00
|
|
|
|
# USER.md is always included when enabled -- Honcho prefetch is additive.
|
2026-02-19 00:57:31 -08:00
|
|
|
|
if self._user_profile_enabled:
|
|
|
|
|
|
user_block = self._memory_store.format_for_system_prompt("user")
|
|
|
|
|
|
if user_block:
|
|
|
|
|
|
prompt_parts.append(user_block)
|
|
|
|
|
|
|
2026-02-19 18:25:53 -08:00
|
|
|
|
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
|
2026-02-19 00:57:31 -08:00
|
|
|
|
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
|
|
|
|
|
|
if skills_prompt:
|
|
|
|
|
|
prompt_parts.append(skills_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
if not self.skip_context_files:
|
|
|
|
|
|
context_files_prompt = build_context_files_prompt()
|
|
|
|
|
|
if context_files_prompt:
|
|
|
|
|
|
prompt_parts.append(context_files_prompt)
|
|
|
|
|
|
|
2026-03-03 11:57:18 +05:30
|
|
|
|
from hermes_time import now as _hermes_now
|
|
|
|
|
|
now = _hermes_now()
|
2026-02-19 00:57:31 -08:00
|
|
|
|
prompt_parts.append(
|
|
|
|
|
|
f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
platform_key = (self.platform or "").lower().strip()
|
|
|
|
|
|
if platform_key in PLATFORM_HINTS:
|
|
|
|
|
|
prompt_parts.append(PLATFORM_HINTS[platform_key])
|
|
|
|
|
|
|
|
|
|
|
|
return "\n\n".join(prompt_parts)
|
|
|
|
|
|
|
|
|
|
|
|
def _invalidate_system_prompt(self):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Invalidate the cached system prompt, forcing a rebuild on the next turn.
|
|
|
|
|
|
|
|
|
|
|
|
Called after context compression events. Also reloads memory from disk
|
|
|
|
|
|
so the rebuilt prompt captures any writes from this session.
|
|
|
|
|
|
"""
|
|
|
|
|
|
self._cached_system_prompt = None
|
|
|
|
|
|
if self._memory_store:
|
|
|
|
|
|
self._memory_store.load_from_disk()
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
|
|
|
|
|
|
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
|
|
|
|
|
|
source_tools = tools if tools is not None else self.tools
|
|
|
|
|
|
if not source_tools:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
converted: List[Dict[str, Any]] = []
|
|
|
|
|
|
for item in source_tools:
|
|
|
|
|
|
fn = item.get("function", {}) if isinstance(item, dict) else {}
|
|
|
|
|
|
name = fn.get("name")
|
|
|
|
|
|
if not isinstance(name, str) or not name.strip():
|
|
|
|
|
|
continue
|
|
|
|
|
|
converted.append({
|
|
|
|
|
|
"type": "function",
|
|
|
|
|
|
"name": name,
|
|
|
|
|
|
"description": fn.get("description", ""),
|
2026-02-25 18:11:26 -08:00
|
|
|
|
"strict": False,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
"parameters": fn.get("parameters", {"type": "object", "properties": {}}),
|
|
|
|
|
|
})
|
|
|
|
|
|
return converted or None
|
|
|
|
|
|
|
2026-02-25 18:11:26 -08:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
|
|
|
|
|
|
"""Split a stored tool id into (call_id, response_item_id)."""
|
|
|
|
|
|
if not isinstance(raw_id, str):
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
value = raw_id.strip()
|
|
|
|
|
|
if not value:
|
|
|
|
|
|
return None, None
|
|
|
|
|
|
if "|" in value:
|
|
|
|
|
|
call_id, response_item_id = value.split("|", 1)
|
|
|
|
|
|
call_id = call_id.strip() or None
|
|
|
|
|
|
response_item_id = response_item_id.strip() or None
|
|
|
|
|
|
return call_id, response_item_id
|
|
|
|
|
|
if value.startswith("fc_"):
|
|
|
|
|
|
return None, value
|
|
|
|
|
|
return value, None
|
|
|
|
|
|
|
|
|
|
|
|
def _derive_responses_function_call_id(
|
|
|
|
|
|
self,
|
|
|
|
|
|
call_id: str,
|
|
|
|
|
|
response_item_id: Optional[str] = None,
|
|
|
|
|
|
) -> str:
|
|
|
|
|
|
"""Build a valid Responses `function_call.id` (must start with `fc_`)."""
|
|
|
|
|
|
if isinstance(response_item_id, str):
|
|
|
|
|
|
candidate = response_item_id.strip()
|
|
|
|
|
|
if candidate.startswith("fc_"):
|
|
|
|
|
|
return candidate
|
|
|
|
|
|
|
|
|
|
|
|
source = (call_id or "").strip()
|
|
|
|
|
|
if source.startswith("fc_"):
|
|
|
|
|
|
return source
|
|
|
|
|
|
if source.startswith("call_") and len(source) > len("call_"):
|
|
|
|
|
|
return f"fc_{source[len('call_'):]}"
|
|
|
|
|
|
|
|
|
|
|
|
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
|
|
|
|
|
|
if sanitized.startswith("fc_"):
|
|
|
|
|
|
return sanitized
|
|
|
|
|
|
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
|
|
|
|
|
|
return f"fc_{sanitized[len('call_'):]}"
|
|
|
|
|
|
if sanitized:
|
|
|
|
|
|
return f"fc_{sanitized[:48]}"
|
|
|
|
|
|
|
|
|
|
|
|
seed = source or str(response_item_id or "") or uuid.uuid4().hex
|
|
|
|
|
|
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
|
|
|
|
|
|
return f"fc_{digest}"
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
|
|
|
|
"""Convert internal chat-style messages to Responses input items."""
|
|
|
|
|
|
items: List[Dict[str, Any]] = []
|
|
|
|
|
|
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
if not isinstance(msg, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
role = msg.get("role")
|
|
|
|
|
|
if role == "system":
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if role in {"user", "assistant"}:
|
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
|
content_text = str(content) if content is not None else ""
|
|
|
|
|
|
|
|
|
|
|
|
if role == "assistant":
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Replay encrypted reasoning items from previous turns
|
|
|
|
|
|
# so the API can maintain coherent reasoning chains.
|
|
|
|
|
|
codex_reasoning = msg.get("codex_reasoning_items")
|
|
|
|
|
|
if isinstance(codex_reasoning, list):
|
|
|
|
|
|
for ri in codex_reasoning:
|
|
|
|
|
|
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
|
|
|
|
|
items.append(ri)
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if content_text.strip():
|
|
|
|
|
|
items.append({"role": "assistant", "content": content_text})
|
|
|
|
|
|
|
|
|
|
|
|
tool_calls = msg.get("tool_calls")
|
|
|
|
|
|
if isinstance(tool_calls, list):
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
if not isinstance(tc, dict):
|
|
|
|
|
|
continue
|
|
|
|
|
|
fn = tc.get("function", {})
|
|
|
|
|
|
fn_name = fn.get("name")
|
|
|
|
|
|
if not isinstance(fn_name, str) or not fn_name.strip():
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2026-02-25 18:11:26 -08:00
|
|
|
|
embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
|
|
|
|
|
|
tc.get("id")
|
|
|
|
|
|
)
|
|
|
|
|
|
call_id = tc.get("call_id")
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
call_id = embedded_call_id
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
2026-02-25 18:11:26 -08:00
|
|
|
|
if (
|
|
|
|
|
|
isinstance(embedded_response_item_id, str)
|
|
|
|
|
|
and embedded_response_item_id.startswith("fc_")
|
|
|
|
|
|
and len(embedded_response_item_id) > len("fc_")
|
|
|
|
|
|
):
|
|
|
|
|
|
call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
|
|
|
|
|
|
else:
|
|
|
|
|
|
call_id = f"call_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
call_id = call_id.strip()
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
arguments = fn.get("arguments", "{}")
|
|
|
|
|
|
if isinstance(arguments, dict):
|
|
|
|
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
|
|
|
|
elif not isinstance(arguments, str):
|
|
|
|
|
|
arguments = str(arguments)
|
|
|
|
|
|
arguments = arguments.strip() or "{}"
|
|
|
|
|
|
|
|
|
|
|
|
items.append({
|
|
|
|
|
|
"type": "function_call",
|
|
|
|
|
|
"call_id": call_id,
|
|
|
|
|
|
"name": fn_name,
|
|
|
|
|
|
"arguments": arguments,
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
items.append({"role": role, "content": content_text})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if role == "tool":
|
2026-02-25 18:11:26 -08:00
|
|
|
|
raw_tool_call_id = msg.get("tool_call_id")
|
|
|
|
|
|
call_id, _ = self._split_responses_tool_id(raw_tool_call_id)
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
|
|
|
|
|
|
call_id = raw_tool_call_id.strip()
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
continue
|
|
|
|
|
|
items.append({
|
|
|
|
|
|
"type": "function_call_output",
|
|
|
|
|
|
"call_id": call_id,
|
|
|
|
|
|
"output": str(msg.get("content", "") or ""),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
return items
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
|
def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
|
|
|
|
|
|
if not isinstance(raw_items, list):
|
|
|
|
|
|
raise ValueError("Codex Responses input must be a list of input items.")
|
|
|
|
|
|
|
|
|
|
|
|
normalized: List[Dict[str, Any]] = []
|
|
|
|
|
|
for idx, item in enumerate(raw_items):
|
|
|
|
|
|
if not isinstance(item, dict):
|
|
|
|
|
|
raise ValueError(f"Codex Responses input[{idx}] must be an object.")
|
|
|
|
|
|
|
|
|
|
|
|
item_type = item.get("type")
|
|
|
|
|
|
if item_type == "function_call":
|
|
|
|
|
|
call_id = item.get("call_id")
|
|
|
|
|
|
name = item.get("name")
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
|
|
|
|
|
|
if not isinstance(name, str) or not name.strip():
|
|
|
|
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
|
|
|
|
|
|
|
|
|
|
|
|
arguments = item.get("arguments", "{}")
|
|
|
|
|
|
if isinstance(arguments, dict):
|
|
|
|
|
|
arguments = json.dumps(arguments, ensure_ascii=False)
|
|
|
|
|
|
elif not isinstance(arguments, str):
|
|
|
|
|
|
arguments = str(arguments)
|
|
|
|
|
|
arguments = arguments.strip() or "{}"
|
|
|
|
|
|
|
|
|
|
|
|
normalized.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"type": "function_call",
|
|
|
|
|
|
"call_id": call_id.strip(),
|
|
|
|
|
|
"name": name.strip(),
|
|
|
|
|
|
"arguments": arguments,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if item_type == "function_call_output":
|
|
|
|
|
|
call_id = item.get("call_id")
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
|
|
|
|
|
|
output = item.get("output", "")
|
|
|
|
|
|
if output is None:
|
|
|
|
|
|
output = ""
|
|
|
|
|
|
if not isinstance(output, str):
|
|
|
|
|
|
output = str(output)
|
|
|
|
|
|
|
|
|
|
|
|
normalized.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"type": "function_call_output",
|
|
|
|
|
|
"call_id": call_id.strip(),
|
|
|
|
|
|
"output": output,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2026-03-01 16:15:20 -08:00
|
|
|
|
if item_type == "reasoning":
|
|
|
|
|
|
encrypted = item.get("encrypted_content")
|
|
|
|
|
|
if isinstance(encrypted, str) and encrypted:
|
|
|
|
|
|
reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
|
|
|
|
|
|
item_id = item.get("id")
|
|
|
|
|
|
if isinstance(item_id, str) and item_id:
|
|
|
|
|
|
reasoning_item["id"] = item_id
|
2026-03-01 20:03:03 -08:00
|
|
|
|
summary = item.get("summary")
|
|
|
|
|
|
if isinstance(summary, list):
|
|
|
|
|
|
reasoning_item["summary"] = summary
|
|
|
|
|
|
else:
|
|
|
|
|
|
reasoning_item["summary"] = []
|
2026-03-01 16:15:20 -08:00
|
|
|
|
normalized.append(reasoning_item)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
|
role = item.get("role")
|
|
|
|
|
|
if role in {"user", "assistant"}:
|
|
|
|
|
|
content = item.get("content", "")
|
|
|
|
|
|
if content is None:
|
|
|
|
|
|
content = ""
|
|
|
|
|
|
if not isinstance(content, str):
|
|
|
|
|
|
content = str(content)
|
|
|
|
|
|
|
|
|
|
|
|
normalized.append({"role": role, "content": content})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
|
|
|
|
|
def _preflight_codex_api_kwargs(
|
|
|
|
|
|
self,
|
|
|
|
|
|
api_kwargs: Any,
|
|
|
|
|
|
*,
|
|
|
|
|
|
allow_stream: bool = False,
|
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
|
if not isinstance(api_kwargs, dict):
|
|
|
|
|
|
raise ValueError("Codex Responses request must be a dict.")
|
|
|
|
|
|
|
|
|
|
|
|
required = {"model", "instructions", "input"}
|
|
|
|
|
|
missing = [key for key in required if key not in api_kwargs]
|
|
|
|
|
|
if missing:
|
|
|
|
|
|
raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
|
|
|
|
|
|
|
|
|
|
|
|
model = api_kwargs.get("model")
|
|
|
|
|
|
if not isinstance(model, str) or not model.strip():
|
|
|
|
|
|
raise ValueError("Codex Responses request 'model' must be a non-empty string.")
|
|
|
|
|
|
model = model.strip()
|
|
|
|
|
|
|
|
|
|
|
|
instructions = api_kwargs.get("instructions")
|
|
|
|
|
|
if instructions is None:
|
|
|
|
|
|
instructions = ""
|
|
|
|
|
|
if not isinstance(instructions, str):
|
|
|
|
|
|
instructions = str(instructions)
|
|
|
|
|
|
instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
|
|
|
|
|
|
|
|
|
|
|
|
normalized_input = self._preflight_codex_input_items(api_kwargs.get("input"))
|
|
|
|
|
|
|
|
|
|
|
|
tools = api_kwargs.get("tools")
|
|
|
|
|
|
normalized_tools = None
|
|
|
|
|
|
if tools is not None:
|
|
|
|
|
|
if not isinstance(tools, list):
|
|
|
|
|
|
raise ValueError("Codex Responses request 'tools' must be a list when provided.")
|
|
|
|
|
|
normalized_tools = []
|
|
|
|
|
|
for idx, tool in enumerate(tools):
|
|
|
|
|
|
if not isinstance(tool, dict):
|
|
|
|
|
|
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
|
|
|
|
|
|
if tool.get("type") != "function":
|
|
|
|
|
|
raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
|
|
|
|
|
|
|
|
|
|
|
|
name = tool.get("name")
|
|
|
|
|
|
parameters = tool.get("parameters")
|
|
|
|
|
|
if not isinstance(name, str) or not name.strip():
|
|
|
|
|
|
raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
|
|
|
|
|
|
if not isinstance(parameters, dict):
|
|
|
|
|
|
raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
|
|
|
|
|
|
|
|
|
|
|
|
description = tool.get("description", "")
|
|
|
|
|
|
if description is None:
|
|
|
|
|
|
description = ""
|
|
|
|
|
|
if not isinstance(description, str):
|
|
|
|
|
|
description = str(description)
|
|
|
|
|
|
|
|
|
|
|
|
strict = tool.get("strict", False)
|
|
|
|
|
|
if not isinstance(strict, bool):
|
|
|
|
|
|
strict = bool(strict)
|
|
|
|
|
|
|
|
|
|
|
|
normalized_tools.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"type": "function",
|
|
|
|
|
|
"name": name.strip(),
|
|
|
|
|
|
"description": description,
|
|
|
|
|
|
"strict": strict,
|
|
|
|
|
|
"parameters": parameters,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
store = api_kwargs.get("store", False)
|
|
|
|
|
|
if store is not False:
|
|
|
|
|
|
raise ValueError("Codex Responses contract requires 'store' to be false.")
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
allowed_keys = {
|
|
|
|
|
|
"model", "instructions", "input", "tools", "store",
|
|
|
|
|
|
"reasoning", "include", "max_output_tokens", "temperature",
|
|
|
|
|
|
}
|
2026-02-25 19:27:54 -08:00
|
|
|
|
normalized: Dict[str, Any] = {
|
|
|
|
|
|
"model": model,
|
|
|
|
|
|
"instructions": instructions,
|
|
|
|
|
|
"input": normalized_input,
|
|
|
|
|
|
"tools": normalized_tools,
|
|
|
|
|
|
"store": False,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Pass through reasoning config
|
|
|
|
|
|
reasoning = api_kwargs.get("reasoning")
|
|
|
|
|
|
if isinstance(reasoning, dict):
|
|
|
|
|
|
normalized["reasoning"] = reasoning
|
|
|
|
|
|
include = api_kwargs.get("include")
|
|
|
|
|
|
if isinstance(include, list):
|
|
|
|
|
|
normalized["include"] = include
|
|
|
|
|
|
|
|
|
|
|
|
# Pass through max_output_tokens and temperature
|
|
|
|
|
|
max_output_tokens = api_kwargs.get("max_output_tokens")
|
|
|
|
|
|
if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
|
|
|
|
|
|
normalized["max_output_tokens"] = int(max_output_tokens)
|
|
|
|
|
|
temperature = api_kwargs.get("temperature")
|
|
|
|
|
|
if isinstance(temperature, (int, float)):
|
|
|
|
|
|
normalized["temperature"] = float(temperature)
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
|
if allow_stream:
|
|
|
|
|
|
stream = api_kwargs.get("stream")
|
|
|
|
|
|
if stream is not None and stream is not True:
|
|
|
|
|
|
raise ValueError("Codex Responses 'stream' must be true when set.")
|
|
|
|
|
|
if stream is True:
|
|
|
|
|
|
normalized["stream"] = True
|
|
|
|
|
|
allowed_keys.add("stream")
|
|
|
|
|
|
elif "stream" in api_kwargs:
|
|
|
|
|
|
raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
|
|
|
|
|
|
|
|
|
|
|
|
unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
|
|
|
|
|
|
if unexpected:
|
|
|
|
|
|
raise ValueError(
|
|
|
|
|
|
f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return normalized
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
def _extract_responses_message_text(self, item: Any) -> str:
|
|
|
|
|
|
"""Extract assistant text from a Responses message output item."""
|
|
|
|
|
|
content = getattr(item, "content", None)
|
|
|
|
|
|
if not isinstance(content, list):
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
chunks: List[str] = []
|
|
|
|
|
|
for part in content:
|
|
|
|
|
|
ptype = getattr(part, "type", None)
|
|
|
|
|
|
if ptype not in {"output_text", "text"}:
|
|
|
|
|
|
continue
|
|
|
|
|
|
text = getattr(part, "text", None)
|
|
|
|
|
|
if isinstance(text, str) and text:
|
|
|
|
|
|
chunks.append(text)
|
|
|
|
|
|
return "".join(chunks).strip()
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_responses_reasoning_text(self, item: Any) -> str:
|
|
|
|
|
|
"""Extract a compact reasoning text from a Responses reasoning item."""
|
|
|
|
|
|
summary = getattr(item, "summary", None)
|
|
|
|
|
|
if isinstance(summary, list):
|
|
|
|
|
|
chunks: List[str] = []
|
|
|
|
|
|
for part in summary:
|
|
|
|
|
|
text = getattr(part, "text", None)
|
|
|
|
|
|
if isinstance(text, str) and text:
|
|
|
|
|
|
chunks.append(text)
|
|
|
|
|
|
if chunks:
|
|
|
|
|
|
return "\n".join(chunks).strip()
|
|
|
|
|
|
text = getattr(item, "text", None)
|
|
|
|
|
|
if isinstance(text, str) and text:
|
|
|
|
|
|
return text.strip()
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
|
|
|
|
|
|
"""Normalize a Responses API object to an assistant_message-like object."""
|
|
|
|
|
|
output = getattr(response, "output", None)
|
|
|
|
|
|
if not isinstance(output, list) or not output:
|
|
|
|
|
|
raise RuntimeError("Responses API returned no output items")
|
|
|
|
|
|
|
|
|
|
|
|
response_status = getattr(response, "status", None)
|
|
|
|
|
|
if isinstance(response_status, str):
|
|
|
|
|
|
response_status = response_status.strip().lower()
|
|
|
|
|
|
else:
|
|
|
|
|
|
response_status = None
|
|
|
|
|
|
|
|
|
|
|
|
if response_status in {"failed", "cancelled"}:
|
|
|
|
|
|
error_obj = getattr(response, "error", None)
|
|
|
|
|
|
if isinstance(error_obj, dict):
|
|
|
|
|
|
error_msg = error_obj.get("message") or str(error_obj)
|
|
|
|
|
|
else:
|
|
|
|
|
|
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
|
|
|
|
|
|
raise RuntimeError(error_msg)
|
|
|
|
|
|
|
|
|
|
|
|
content_parts: List[str] = []
|
|
|
|
|
|
reasoning_parts: List[str] = []
|
2026-02-28 21:47:51 -08:00
|
|
|
|
reasoning_items_raw: List[Dict[str, Any]] = []
|
2026-02-25 18:20:38 -08:00
|
|
|
|
tool_calls: List[Any] = []
|
|
|
|
|
|
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
2026-02-25 18:11:26 -08:00
|
|
|
|
saw_commentary_phase = False
|
|
|
|
|
|
saw_final_answer_phase = False
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
|
|
|
|
|
for item in output:
|
|
|
|
|
|
item_type = getattr(item, "type", None)
|
|
|
|
|
|
item_status = getattr(item, "status", None)
|
|
|
|
|
|
if isinstance(item_status, str):
|
|
|
|
|
|
item_status = item_status.strip().lower()
|
|
|
|
|
|
else:
|
|
|
|
|
|
item_status = None
|
|
|
|
|
|
|
|
|
|
|
|
if item_status in {"queued", "in_progress", "incomplete"}:
|
|
|
|
|
|
has_incomplete_items = True
|
|
|
|
|
|
|
|
|
|
|
|
if item_type == "message":
|
2026-02-25 18:11:26 -08:00
|
|
|
|
item_phase = getattr(item, "phase", None)
|
|
|
|
|
|
if isinstance(item_phase, str):
|
|
|
|
|
|
normalized_phase = item_phase.strip().lower()
|
|
|
|
|
|
if normalized_phase in {"commentary", "analysis"}:
|
|
|
|
|
|
saw_commentary_phase = True
|
|
|
|
|
|
elif normalized_phase in {"final_answer", "final"}:
|
|
|
|
|
|
saw_final_answer_phase = True
|
2026-02-25 18:20:38 -08:00
|
|
|
|
message_text = self._extract_responses_message_text(item)
|
|
|
|
|
|
if message_text:
|
|
|
|
|
|
content_parts.append(message_text)
|
|
|
|
|
|
elif item_type == "reasoning":
|
|
|
|
|
|
reasoning_text = self._extract_responses_reasoning_text(item)
|
|
|
|
|
|
if reasoning_text:
|
|
|
|
|
|
reasoning_parts.append(reasoning_text)
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Capture the full reasoning item for multi-turn continuity.
|
|
|
|
|
|
# encrypted_content is an opaque blob the API needs back on
|
|
|
|
|
|
# subsequent turns to maintain coherent reasoning chains.
|
|
|
|
|
|
encrypted = getattr(item, "encrypted_content", None)
|
|
|
|
|
|
if isinstance(encrypted, str) and encrypted:
|
|
|
|
|
|
raw_item = {"type": "reasoning", "encrypted_content": encrypted}
|
|
|
|
|
|
item_id = getattr(item, "id", None)
|
|
|
|
|
|
if isinstance(item_id, str) and item_id:
|
|
|
|
|
|
raw_item["id"] = item_id
|
2026-03-01 20:03:03 -08:00
|
|
|
|
# Capture summary — required by the API when replaying reasoning items
|
|
|
|
|
|
summary = getattr(item, "summary", None)
|
|
|
|
|
|
if isinstance(summary, list):
|
|
|
|
|
|
raw_summary = []
|
|
|
|
|
|
for part in summary:
|
|
|
|
|
|
text = getattr(part, "text", None)
|
|
|
|
|
|
if isinstance(text, str):
|
|
|
|
|
|
raw_summary.append({"type": "summary_text", "text": text})
|
|
|
|
|
|
raw_item["summary"] = raw_summary
|
2026-02-28 21:47:51 -08:00
|
|
|
|
reasoning_items_raw.append(raw_item)
|
2026-02-25 18:20:38 -08:00
|
|
|
|
elif item_type == "function_call":
|
|
|
|
|
|
if item_status in {"queued", "in_progress", "incomplete"}:
|
|
|
|
|
|
continue
|
|
|
|
|
|
fn_name = getattr(item, "name", "") or ""
|
|
|
|
|
|
arguments = getattr(item, "arguments", "{}")
|
|
|
|
|
|
if not isinstance(arguments, str):
|
|
|
|
|
|
arguments = str(arguments)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
raw_call_id = getattr(item, "call_id", None)
|
|
|
|
|
|
raw_item_id = getattr(item, "id", None)
|
|
|
|
|
|
embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
|
|
|
|
|
|
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
call_id = f"call_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
call_id = call_id.strip()
|
|
|
|
|
|
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
|
|
|
|
|
response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
|
2026-02-25 18:20:38 -08:00
|
|
|
|
tool_calls.append(SimpleNamespace(
|
|
|
|
|
|
id=call_id,
|
2026-02-25 18:11:26 -08:00
|
|
|
|
call_id=call_id,
|
|
|
|
|
|
response_item_id=response_item_id,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
type="function",
|
|
|
|
|
|
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
|
|
|
|
|
))
|
|
|
|
|
|
elif item_type == "custom_tool_call":
|
|
|
|
|
|
fn_name = getattr(item, "name", "") or ""
|
|
|
|
|
|
arguments = getattr(item, "input", "{}")
|
|
|
|
|
|
if not isinstance(arguments, str):
|
|
|
|
|
|
arguments = str(arguments)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
raw_call_id = getattr(item, "call_id", None)
|
|
|
|
|
|
raw_item_id = getattr(item, "id", None)
|
|
|
|
|
|
embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
|
|
|
|
|
|
call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
call_id = f"call_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
call_id = call_id.strip()
|
|
|
|
|
|
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
|
|
|
|
|
|
response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
|
2026-02-25 18:20:38 -08:00
|
|
|
|
tool_calls.append(SimpleNamespace(
|
|
|
|
|
|
id=call_id,
|
2026-02-25 18:11:26 -08:00
|
|
|
|
call_id=call_id,
|
|
|
|
|
|
response_item_id=response_item_id,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
type="function",
|
|
|
|
|
|
function=SimpleNamespace(name=fn_name, arguments=arguments),
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
final_text = "\n".join([p for p in content_parts if p]).strip()
|
|
|
|
|
|
if not final_text and hasattr(response, "output_text"):
|
|
|
|
|
|
out_text = getattr(response, "output_text", "")
|
|
|
|
|
|
if isinstance(out_text, str):
|
|
|
|
|
|
final_text = out_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
assistant_message = SimpleNamespace(
|
|
|
|
|
|
content=final_text,
|
|
|
|
|
|
tool_calls=tool_calls,
|
|
|
|
|
|
reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
|
|
|
|
|
|
reasoning_content=None,
|
|
|
|
|
|
reasoning_details=None,
|
2026-02-28 21:47:51 -08:00
|
|
|
|
codex_reasoning_items=reasoning_items_raw or None,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if tool_calls:
|
|
|
|
|
|
finish_reason = "tool_calls"
|
2026-02-25 18:11:26 -08:00
|
|
|
|
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
2026-02-25 18:20:38 -08:00
|
|
|
|
finish_reason = "incomplete"
|
|
|
|
|
|
else:
|
|
|
|
|
|
finish_reason = "stop"
|
|
|
|
|
|
return assistant_message, finish_reason
|
|
|
|
|
|
|
|
|
|
|
|
def _run_codex_stream(self, api_kwargs: dict):
|
|
|
|
|
|
"""Execute one streaming Responses API request and return the final response."""
|
2026-02-25 18:56:06 -08:00
|
|
|
|
max_stream_retries = 1
|
|
|
|
|
|
for attempt in range(max_stream_retries + 1):
|
|
|
|
|
|
try:
|
|
|
|
|
|
with self.client.responses.stream(**api_kwargs) as stream:
|
|
|
|
|
|
for _ in stream:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return stream.get_final_response()
|
|
|
|
|
|
except RuntimeError as exc:
|
|
|
|
|
|
err_text = str(exc)
|
|
|
|
|
|
missing_completed = "response.completed" in err_text
|
|
|
|
|
|
if missing_completed and attempt < max_stream_retries:
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
|
"Responses stream closed before completion (attempt %s/%s); retrying.",
|
|
|
|
|
|
attempt + 1,
|
|
|
|
|
|
max_stream_retries + 1,
|
|
|
|
|
|
)
|
|
|
|
|
|
continue
|
|
|
|
|
|
if missing_completed:
|
|
|
|
|
|
logger.debug(
|
2026-02-25 19:08:11 -08:00
|
|
|
|
"Responses stream did not emit response.completed; falling back to create(stream=True)."
|
2026-02-25 18:56:06 -08:00
|
|
|
|
)
|
2026-02-25 19:08:11 -08:00
|
|
|
|
return self._run_codex_create_stream_fallback(api_kwargs)
|
2026-02-25 18:56:06 -08:00
|
|
|
|
raise
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
2026-02-25 19:08:11 -08:00
|
|
|
|
def _run_codex_create_stream_fallback(self, api_kwargs: dict):
|
|
|
|
|
|
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
|
|
|
|
|
|
fallback_kwargs = dict(api_kwargs)
|
|
|
|
|
|
fallback_kwargs["stream"] = True
|
2026-02-25 19:27:54 -08:00
|
|
|
|
fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
|
2026-02-25 19:08:11 -08:00
|
|
|
|
stream_or_response = self.client.responses.create(**fallback_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
# Compatibility shim for mocks or providers that still return a concrete response.
|
|
|
|
|
|
if hasattr(stream_or_response, "output"):
|
|
|
|
|
|
return stream_or_response
|
|
|
|
|
|
if not hasattr(stream_or_response, "__iter__"):
|
|
|
|
|
|
return stream_or_response
|
|
|
|
|
|
|
|
|
|
|
|
terminal_response = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
for event in stream_or_response:
|
|
|
|
|
|
event_type = getattr(event, "type", None)
|
|
|
|
|
|
if not event_type and isinstance(event, dict):
|
|
|
|
|
|
event_type = event.get("type")
|
|
|
|
|
|
if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
terminal_response = getattr(event, "response", None)
|
|
|
|
|
|
if terminal_response is None and isinstance(event, dict):
|
|
|
|
|
|
terminal_response = event.get("response")
|
|
|
|
|
|
if terminal_response is not None:
|
|
|
|
|
|
return terminal_response
|
|
|
|
|
|
finally:
|
|
|
|
|
|
close_fn = getattr(stream_or_response, "close", None)
|
|
|
|
|
|
if callable(close_fn):
|
|
|
|
|
|
try:
|
|
|
|
|
|
close_fn()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
if terminal_response is not None:
|
|
|
|
|
|
return terminal_response
|
|
|
|
|
|
raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
|
|
|
|
|
|
|
2026-02-25 19:27:54 -08:00
|
|
|
|
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
|
|
|
|
|
|
if self.api_mode != "codex_responses" or self.provider != "openai-codex":
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.auth import resolve_codex_runtime_credentials
|
|
|
|
|
|
|
|
|
|
|
|
creds = resolve_codex_runtime_credentials(force_refresh=force)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.debug("Codex credential refresh failed: %s", exc)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
api_key = creds.get("api_key")
|
|
|
|
|
|
base_url = creds.get("base_url")
|
|
|
|
|
|
if not isinstance(api_key, str) or not api_key.strip():
|
|
|
|
|
|
return False
|
|
|
|
|
|
if not isinstance(base_url, str) or not base_url.strip():
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
self.api_key = api_key.strip()
|
|
|
|
|
|
self.base_url = base_url.strip().rstrip("/")
|
|
|
|
|
|
self._client_kwargs["api_key"] = self.api_key
|
|
|
|
|
|
self._client_kwargs["base_url"] = self.base_url
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client.close()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client = OpenAI(**self._client_kwargs)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
2026-03-07 13:34:23 +11:00
|
|
|
|
def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
|
|
|
|
|
|
if self.api_mode != "chat_completions" or self.provider != "nous":
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from hermes_cli.auth import resolve_nous_runtime_credentials
|
|
|
|
|
|
|
|
|
|
|
|
creds = resolve_nous_runtime_credentials(
|
|
|
|
|
|
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
|
|
|
|
|
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
|
|
|
|
|
force_mint=force,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.debug("Nous credential refresh failed: %s", exc)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
api_key = creds.get("api_key")
|
|
|
|
|
|
base_url = creds.get("base_url")
|
|
|
|
|
|
if not isinstance(api_key, str) or not api_key.strip():
|
|
|
|
|
|
return False
|
|
|
|
|
|
if not isinstance(base_url, str) or not base_url.strip():
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
self.api_key = api_key.strip()
|
|
|
|
|
|
self.base_url = base_url.strip().rstrip("/")
|
|
|
|
|
|
self._client_kwargs["api_key"] = self.api_key
|
|
|
|
|
|
self._client_kwargs["base_url"] = self.base_url
|
|
|
|
|
|
# Nous requests should not inherit OpenRouter-only attribution headers.
|
|
|
|
|
|
self._client_kwargs.pop("default_headers", None)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client.close()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client = OpenAI(**self._client_kwargs)
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
|
logger.warning("Failed to rebuild OpenAI client after Nous refresh: %s", exc)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
2026-02-21 12:15:40 -08:00
|
|
|
|
def _interruptible_api_call(self, api_kwargs: dict):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Run the API call in a background thread so the main conversation loop
|
|
|
|
|
|
can detect interrupts without waiting for the full HTTP round-trip.
|
|
|
|
|
|
|
2026-02-23 02:11:33 -08:00
|
|
|
|
On interrupt, closes the HTTP client to cancel the in-flight request
|
|
|
|
|
|
(stops token generation and avoids wasting money), then rebuilds the
|
|
|
|
|
|
client for future calls.
|
2026-02-21 12:15:40 -08:00
|
|
|
|
"""
|
|
|
|
|
|
result = {"response": None, "error": None}
|
|
|
|
|
|
|
|
|
|
|
|
def _call():
|
|
|
|
|
|
try:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
result["response"] = self._run_codex_stream(api_kwargs)
|
|
|
|
|
|
else:
|
|
|
|
|
|
result["response"] = self.client.chat.completions.create(**api_kwargs)
|
2026-02-21 12:15:40 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
result["error"] = e
|
|
|
|
|
|
|
|
|
|
|
|
t = threading.Thread(target=_call, daemon=True)
|
|
|
|
|
|
t.start()
|
|
|
|
|
|
while t.is_alive():
|
|
|
|
|
|
t.join(timeout=0.3)
|
|
|
|
|
|
if self._interrupt_requested:
|
2026-02-23 02:11:33 -08:00
|
|
|
|
# Force-close the HTTP connection to stop token generation
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client.close()
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
# Rebuild the client for future calls (cheap, no network)
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.client = OpenAI(**self._client_kwargs)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2026-02-21 12:15:40 -08:00
|
|
|
|
raise InterruptedError("Agent interrupted during API call")
|
|
|
|
|
|
if result["error"] is not None:
|
|
|
|
|
|
raise result["error"]
|
|
|
|
|
|
return result["response"]
|
|
|
|
|
|
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
# ── Provider fallback ──────────────────────────────────────────────────
|
|
|
|
|
|
|
refactor: unified OAuth/API-key credential resolution for fallback
Split fallback provider handling into two clean registries:
_FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
_FALLBACK_OAUTH_PROVIDERS — OAuth-based (openai-codex, nous)
New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.
Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.
OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.
4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.
2026-03-08 21:44:48 -07:00
|
|
|
|
# API-key providers: provider → (base_url, [env_var_names])
|
|
|
|
|
|
_FALLBACK_API_KEY_PROVIDERS = {
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
"openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
|
2026-03-08 20:49:55 -07:00
|
|
|
|
"zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
|
|
|
|
|
|
"kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
|
|
|
|
|
|
"minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
|
|
|
|
|
|
"minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
refactor: unified OAuth/API-key credential resolution for fallback
Split fallback provider handling into two clean registries:
_FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
_FALLBACK_OAUTH_PROVIDERS — OAuth-based (openai-codex, nous)
New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.
Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.
OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.
4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.
2026-03-08 21:44:48 -07:00
|
|
|
|
# OAuth providers: provider → (resolver_import_path, api_mode)
|
|
|
|
|
|
# Each resolver returns {"api_key": ..., "base_url": ...}.
|
|
|
|
|
|
_FALLBACK_OAUTH_PROVIDERS = {
|
|
|
|
|
|
"openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
|
|
|
|
|
|
"nous": ("resolve_nous_runtime_credentials", "chat_completions"),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_fallback_credentials(
|
|
|
|
|
|
self, fb_provider: str, fb_config: dict
|
|
|
|
|
|
) -> Optional[tuple]:
|
|
|
|
|
|
"""Resolve credentials for a fallback provider.
|
|
|
|
|
|
|
|
|
|
|
|
Returns (api_key, base_url, api_mode) on success, or None on failure.
|
|
|
|
|
|
Handles three cases:
|
|
|
|
|
|
1. OAuth providers (openai-codex, nous) — call credential resolver
|
|
|
|
|
|
2. API-key providers (openrouter, zai, etc.) — read env var
|
|
|
|
|
|
3. Custom endpoints — use base_url + api_key_env from config
|
|
|
|
|
|
"""
|
|
|
|
|
|
# ── 1. OAuth providers ────────────────────────────────────────
|
|
|
|
|
|
if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
|
|
|
|
|
|
resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
|
|
|
|
|
|
try:
|
|
|
|
|
|
import hermes_cli.auth as _auth
|
|
|
|
|
|
resolver = getattr(_auth, resolver_name)
|
|
|
|
|
|
creds = resolver()
|
|
|
|
|
|
return creds["api_key"], creds["base_url"], api_mode
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.warning(
|
|
|
|
|
|
"Fallback to %s failed (credential resolution): %s",
|
|
|
|
|
|
fb_provider, e,
|
|
|
|
|
|
)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# ── 2. API-key providers ──────────────────────────────────────
|
|
|
|
|
|
fb_key = (fb_config.get("api_key") or "").strip()
|
|
|
|
|
|
if not fb_key:
|
|
|
|
|
|
key_env = (fb_config.get("api_key_env") or "").strip()
|
|
|
|
|
|
if key_env:
|
|
|
|
|
|
fb_key = os.getenv(key_env, "")
|
|
|
|
|
|
elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
|
|
|
|
|
|
for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
|
|
|
|
|
|
fb_key = os.getenv(env_var, "")
|
|
|
|
|
|
if fb_key:
|
|
|
|
|
|
break
|
|
|
|
|
|
if not fb_key:
|
|
|
|
|
|
logging.warning(
|
|
|
|
|
|
"Fallback model configured but no API key found for provider '%s'",
|
|
|
|
|
|
fb_provider,
|
|
|
|
|
|
)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# ── 3. Resolve base URL ───────────────────────────────────────
|
|
|
|
|
|
fb_base_url = (fb_config.get("base_url") or "").strip()
|
|
|
|
|
|
if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
|
|
|
|
|
|
fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
|
|
|
|
|
|
if not fb_base_url:
|
|
|
|
|
|
fb_base_url = OPENROUTER_BASE_URL
|
|
|
|
|
|
|
|
|
|
|
|
return fb_key, fb_base_url, "chat_completions"
|
|
|
|
|
|
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
def _try_activate_fallback(self) -> bool:
|
|
|
|
|
|
"""Switch to the configured fallback model/provider.
|
|
|
|
|
|
|
|
|
|
|
|
Called when the primary model is failing after retries. Swaps the
|
|
|
|
|
|
OpenAI client, model slug, and provider in-place so the retry loop
|
|
|
|
|
|
can continue with the new backend. One-shot: returns False if
|
|
|
|
|
|
already activated or not configured.
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._fallback_activated or not self._fallback_model:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
fb = self._fallback_model
|
|
|
|
|
|
fb_provider = (fb.get("provider") or "").strip().lower()
|
|
|
|
|
|
fb_model = (fb.get("model") or "").strip()
|
|
|
|
|
|
if not fb_provider or not fb_model:
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
refactor: unified OAuth/API-key credential resolution for fallback
Split fallback provider handling into two clean registries:
_FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
_FALLBACK_OAUTH_PROVIDERS — OAuth-based (openai-codex, nous)
New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.
Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.
OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.
4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.
2026-03-08 21:44:48 -07:00
|
|
|
|
resolved = self._resolve_fallback_credentials(fb_provider, fb)
|
|
|
|
|
|
if resolved is None:
|
|
|
|
|
|
return False
|
|
|
|
|
|
fb_key, fb_base_url, fb_api_mode = resolved
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
|
refactor: unified OAuth/API-key credential resolution for fallback
Split fallback provider handling into two clean registries:
_FALLBACK_API_KEY_PROVIDERS — env-var-based (openrouter, zai, kimi, minimax)
_FALLBACK_OAUTH_PROVIDERS — OAuth-based (openai-codex, nous)
New _resolve_fallback_credentials() method handles all three cases
(OAuth, API key, custom endpoint) and returns a uniform (key, url, mode)
tuple. _try_activate_fallback() is now just validation + client build.
Adds Nous Portal as a fallback provider — uses the same OAuth flow
as the primary provider (hermes login), returns chat_completions mode.
OAuth providers get credential refresh for free: the existing 401
retry handlers (_try_refresh_codex/nous_client_credentials) check
self.provider, which is set correctly after fallback activation.
4 new tests (nous activation, nous no-login, codex retained).
27 total fallback tests passing, 2548 full suite.
2026-03-08 21:44:48 -07:00
|
|
|
|
# Build new client
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
try:
|
|
|
|
|
|
client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
|
|
|
|
|
|
if "openrouter" in fb_base_url.lower():
|
|
|
|
|
|
client_kwargs["default_headers"] = {
|
|
|
|
|
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
|
|
|
|
"X-OpenRouter-Title": "Hermes Agent",
|
|
|
|
|
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
|
|
|
|
}
|
2026-03-08 20:49:55 -07:00
|
|
|
|
elif "api.kimi.com" in fb_base_url.lower():
|
|
|
|
|
|
client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
|
|
|
|
|
|
self.client = OpenAI(**client_kwargs)
|
|
|
|
|
|
self._client_kwargs = client_kwargs
|
|
|
|
|
|
old_model = self.model
|
|
|
|
|
|
self.model = fb_model
|
|
|
|
|
|
self.provider = fb_provider
|
|
|
|
|
|
self.base_url = fb_base_url
|
2026-03-08 21:34:15 -07:00
|
|
|
|
self.api_mode = fb_api_mode
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
self._fallback_activated = True
|
|
|
|
|
|
|
|
|
|
|
|
# Re-evaluate prompt caching for the new provider/model
|
|
|
|
|
|
self._use_prompt_caching = (
|
|
|
|
|
|
"openrouter" in fb_base_url.lower()
|
|
|
|
|
|
and "claude" in fb_model.lower()
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"{self.log_prefix}🔄 Primary model failed — switching to fallback: "
|
|
|
|
|
|
f"{fb_model} via {fb_provider}"
|
|
|
|
|
|
)
|
|
|
|
|
|
logging.info(
|
|
|
|
|
|
"Fallback activated: %s → %s (%s)",
|
|
|
|
|
|
old_model, fb_model, fb_provider,
|
|
|
|
|
|
)
|
|
|
|
|
|
return True
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.error("Failed to activate fallback model: %s", e)
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# ── End provider fallback ──────────────────────────────────────────────
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
def _build_api_kwargs(self, api_messages: list) -> dict:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
"""Build the keyword arguments dict for the active API mode."""
|
|
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
instructions = ""
|
|
|
|
|
|
payload_messages = api_messages
|
|
|
|
|
|
if api_messages and api_messages[0].get("role") == "system":
|
|
|
|
|
|
instructions = str(api_messages[0].get("content") or "").strip()
|
|
|
|
|
|
payload_messages = api_messages[1:]
|
|
|
|
|
|
if not instructions:
|
|
|
|
|
|
instructions = DEFAULT_AGENT_IDENTITY
|
|
|
|
|
|
|
2026-03-07 10:14:19 -08:00
|
|
|
|
# Resolve reasoning effort: config > default (medium)
|
|
|
|
|
|
reasoning_effort = "medium"
|
2026-03-01 16:15:20 -08:00
|
|
|
|
reasoning_enabled = True
|
|
|
|
|
|
if self.reasoning_config and isinstance(self.reasoning_config, dict):
|
|
|
|
|
|
if self.reasoning_config.get("enabled") is False:
|
|
|
|
|
|
reasoning_enabled = False
|
|
|
|
|
|
elif self.reasoning_config.get("effort"):
|
|
|
|
|
|
reasoning_effort = self.reasoning_config["effort"]
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
kwargs = {
|
2026-02-25 18:20:38 -08:00
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"instructions": instructions,
|
|
|
|
|
|
"input": self._chat_messages_to_responses_input(payload_messages),
|
|
|
|
|
|
"tools": self._responses_tools(),
|
|
|
|
|
|
"store": False,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-01 16:15:20 -08:00
|
|
|
|
if reasoning_enabled:
|
|
|
|
|
|
kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
|
|
|
|
|
|
kwargs["include"] = ["reasoning.encrypted_content"]
|
|
|
|
|
|
else:
|
|
|
|
|
|
kwargs["include"] = []
|
2026-02-28 21:47:51 -08:00
|
|
|
|
|
|
|
|
|
|
if self.max_tokens is not None:
|
|
|
|
|
|
kwargs["max_output_tokens"] = self.max_tokens
|
|
|
|
|
|
|
|
|
|
|
|
return kwargs
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
provider_preferences = {}
|
|
|
|
|
|
if self.providers_allowed:
|
|
|
|
|
|
provider_preferences["only"] = self.providers_allowed
|
|
|
|
|
|
if self.providers_ignored:
|
|
|
|
|
|
provider_preferences["ignore"] = self.providers_ignored
|
|
|
|
|
|
if self.providers_order:
|
|
|
|
|
|
provider_preferences["order"] = self.providers_order
|
|
|
|
|
|
if self.provider_sort:
|
|
|
|
|
|
provider_preferences["sort"] = self.provider_sort
|
2026-03-01 18:24:27 -08:00
|
|
|
|
if self.provider_require_parameters:
|
|
|
|
|
|
provider_preferences["require_parameters"] = True
|
|
|
|
|
|
if self.provider_data_collection:
|
|
|
|
|
|
provider_preferences["data_collection"] = self.provider_data_collection
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
api_kwargs = {
|
|
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"messages": api_messages,
|
|
|
|
|
|
"tools": self.tools if self.tools else None,
|
2026-02-27 17:35:26 -08:00
|
|
|
|
"timeout": 900.0,
|
2026-02-21 04:17:27 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if self.max_tokens is not None:
|
2026-02-26 20:23:56 -08:00
|
|
|
|
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
extra_body = {}
|
|
|
|
|
|
|
|
|
|
|
|
if provider_preferences:
|
|
|
|
|
|
extra_body["provider"] = provider_preferences
|
|
|
|
|
|
|
2026-02-25 16:49:41 -08:00
|
|
|
|
_is_openrouter = "openrouter" in self.base_url.lower()
|
|
|
|
|
|
_is_nous = "nousresearch" in self.base_url.lower()
|
|
|
|
|
|
|
2026-03-02 11:35:03 +03:00
|
|
|
|
_is_mistral = "api.mistral.ai" in self.base_url.lower()
|
|
|
|
|
|
if (_is_openrouter or _is_nous) and not _is_mistral:
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if self.reasoning_config is not None:
|
|
|
|
|
|
extra_body["reasoning"] = self.reasoning_config
|
|
|
|
|
|
else:
|
|
|
|
|
|
extra_body["reasoning"] = {
|
|
|
|
|
|
"enabled": True,
|
2026-03-07 10:14:19 -08:00
|
|
|
|
"effort": "medium"
|
2026-02-21 04:17:27 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-25 16:49:41 -08:00
|
|
|
|
# Nous Portal product attribution
|
|
|
|
|
|
if _is_nous:
|
|
|
|
|
|
extra_body["tags"] = ["product=hermes-agent"]
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if extra_body:
|
|
|
|
|
|
api_kwargs["extra_body"] = extra_body
|
|
|
|
|
|
|
|
|
|
|
|
return api_kwargs
|
|
|
|
|
|
|
|
|
|
|
|
def _build_assistant_message(self, assistant_message, finish_reason: str) -> dict:
|
|
|
|
|
|
"""Build a normalized assistant message dict from an API response message.
|
|
|
|
|
|
|
|
|
|
|
|
Handles reasoning extraction, reasoning_details, and optional tool_calls
|
|
|
|
|
|
so both the tool-call path and the final-response path share one builder.
|
|
|
|
|
|
"""
|
|
|
|
|
|
reasoning_text = self._extract_reasoning(assistant_message)
|
|
|
|
|
|
|
|
|
|
|
|
if reasoning_text and self.verbose_logging:
|
|
|
|
|
|
preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
|
|
|
|
|
|
logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
|
|
|
|
|
|
|
|
|
|
|
|
msg = {
|
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
|
"content": assistant_message.content or "",
|
|
|
|
|
|
"reasoning": reasoning_text,
|
|
|
|
|
|
"finish_reason": finish_reason,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Pass reasoning_details back unmodified so providers (OpenRouter,
|
|
|
|
|
|
# Anthropic, OpenAI) can maintain reasoning continuity across turns.
|
|
|
|
|
|
# Each provider may include opaque fields (signature, encrypted_content)
|
|
|
|
|
|
# that must be preserved exactly.
|
|
|
|
|
|
raw_details = assistant_message.reasoning_details
|
|
|
|
|
|
preserved = []
|
|
|
|
|
|
for d in raw_details:
|
|
|
|
|
|
if isinstance(d, dict):
|
|
|
|
|
|
preserved.append(d)
|
|
|
|
|
|
elif hasattr(d, "__dict__"):
|
|
|
|
|
|
preserved.append(d.__dict__)
|
|
|
|
|
|
elif hasattr(d, "model_dump"):
|
|
|
|
|
|
preserved.append(d.model_dump())
|
|
|
|
|
|
if preserved:
|
|
|
|
|
|
msg["reasoning_details"] = preserved
|
|
|
|
|
|
|
|
|
|
|
|
# Codex Responses API: preserve encrypted reasoning items for
|
|
|
|
|
|
# multi-turn continuity. These get replayed as input on the next turn.
|
|
|
|
|
|
codex_items = getattr(assistant_message, "codex_reasoning_items", None)
|
|
|
|
|
|
if codex_items:
|
|
|
|
|
|
msg["codex_reasoning_items"] = codex_items
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
if assistant_message.tool_calls:
|
2026-02-25 18:11:26 -08:00
|
|
|
|
tool_calls = []
|
|
|
|
|
|
for tool_call in assistant_message.tool_calls:
|
|
|
|
|
|
raw_id = getattr(tool_call, "id", None)
|
|
|
|
|
|
call_id = getattr(tool_call, "call_id", None)
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
embedded_call_id, _ = self._split_responses_tool_id(raw_id)
|
|
|
|
|
|
call_id = embedded_call_id
|
|
|
|
|
|
if not isinstance(call_id, str) or not call_id.strip():
|
|
|
|
|
|
if isinstance(raw_id, str) and raw_id.strip():
|
|
|
|
|
|
call_id = raw_id.strip()
|
|
|
|
|
|
else:
|
|
|
|
|
|
call_id = f"call_{uuid.uuid4().hex[:12]}"
|
|
|
|
|
|
call_id = call_id.strip()
|
|
|
|
|
|
|
|
|
|
|
|
response_item_id = getattr(tool_call, "response_item_id", None)
|
|
|
|
|
|
if not isinstance(response_item_id, str) or not response_item_id.strip():
|
|
|
|
|
|
_, embedded_response_item_id = self._split_responses_tool_id(raw_id)
|
|
|
|
|
|
response_item_id = embedded_response_item_id
|
|
|
|
|
|
|
|
|
|
|
|
response_item_id = self._derive_responses_function_call_id(
|
|
|
|
|
|
call_id,
|
|
|
|
|
|
response_item_id if isinstance(response_item_id, str) else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-28 18:01:13 +03:00
|
|
|
|
tc_dict = {
|
2026-02-25 18:11:26 -08:00
|
|
|
|
"id": call_id,
|
|
|
|
|
|
"call_id": call_id,
|
|
|
|
|
|
"response_item_id": response_item_id,
|
2026-02-21 04:17:27 -08:00
|
|
|
|
"type": tool_call.type,
|
|
|
|
|
|
"function": {
|
|
|
|
|
|
"name": tool_call.function.name,
|
|
|
|
|
|
"arguments": tool_call.function.arguments
|
2026-02-25 18:11:26 -08:00
|
|
|
|
},
|
2026-02-21 04:17:27 -08:00
|
|
|
|
}
|
2026-02-28 18:01:13 +03:00
|
|
|
|
# Preserve extra_content (e.g. Gemini thought_signature) so it
|
|
|
|
|
|
# is sent back on subsequent API calls. Without this, Gemini 3
|
|
|
|
|
|
# thinking models reject the request with a 400 error.
|
|
|
|
|
|
extra = getattr(tool_call, "extra_content", None)
|
|
|
|
|
|
if extra is not None:
|
|
|
|
|
|
if hasattr(extra, "model_dump"):
|
|
|
|
|
|
extra = extra.model_dump()
|
|
|
|
|
|
tc_dict["extra_content"] = extra
|
2026-03-01 11:59:12 +03:00
|
|
|
|
tool_calls.append(tc_dict)
|
2026-02-25 18:11:26 -08:00
|
|
|
|
msg["tool_calls"] = tool_calls
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
return msg
|
|
|
|
|
|
|
2026-02-22 10:15:17 -08:00
|
|
|
|
def flush_memories(self, messages: list = None, min_turns: int = None):
|
|
|
|
|
|
"""Give the model one turn to persist memories before context is lost.
|
|
|
|
|
|
|
|
|
|
|
|
Called before compression, session reset, or CLI exit. Injects a flush
|
|
|
|
|
|
message, makes one API call, executes any memory tool calls, then
|
|
|
|
|
|
strips all flush artifacts from the message list.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
messages: The current conversation messages. If None, uses
|
|
|
|
|
|
self._session_messages (last run_conversation state).
|
|
|
|
|
|
min_turns: Minimum user turns required to trigger the flush.
|
|
|
|
|
|
None = use config value (flush_min_turns).
|
|
|
|
|
|
0 = always flush (used for compression).
|
|
|
|
|
|
"""
|
|
|
|
|
|
if self._memory_flush_min_turns == 0 and min_turns is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
if "memory" not in self.valid_tool_names or not self._memory_store:
|
|
|
|
|
|
return
|
|
|
|
|
|
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
|
|
|
|
|
|
if self._user_turn_count < effective_min:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
if messages is None:
|
|
|
|
|
|
messages = getattr(self, '_session_messages', None)
|
|
|
|
|
|
if not messages or len(messages) < 3:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
flush_content = (
|
|
|
|
|
|
"[System: The session is being compressed. "
|
|
|
|
|
|
"Please save anything worth remembering to your memories.]"
|
|
|
|
|
|
)
|
2026-02-27 03:21:49 -08:00
|
|
|
|
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
|
|
|
|
|
|
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
|
2026-02-22 10:15:17 -08:00
|
|
|
|
messages.append(flush_msg)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Build API messages for the flush call
|
|
|
|
|
|
api_messages = []
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
api_msg = msg.copy()
|
|
|
|
|
|
if msg.get("role") == "assistant":
|
|
|
|
|
|
reasoning = msg.get("reasoning")
|
|
|
|
|
|
if reasoning:
|
|
|
|
|
|
api_msg["reasoning_content"] = reasoning
|
|
|
|
|
|
api_msg.pop("reasoning", None)
|
2026-03-02 11:35:03 +03:00
|
|
|
|
api_msg.pop("finish_reason", None)
|
2026-03-01 02:51:31 +03:00
|
|
|
|
api_msg.pop("_flush_sentinel", None)
|
2026-02-22 10:15:17 -08:00
|
|
|
|
api_messages.append(api_msg)
|
|
|
|
|
|
|
|
|
|
|
|
if self._cached_system_prompt:
|
|
|
|
|
|
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
|
|
|
|
|
|
|
|
|
|
|
|
# Make one API call with only the memory tool available
|
|
|
|
|
|
memory_tool_def = None
|
|
|
|
|
|
for t in (self.tools or []):
|
|
|
|
|
|
if t.get("function", {}).get("name") == "memory":
|
|
|
|
|
|
memory_tool_def = t
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if not memory_tool_def:
|
|
|
|
|
|
messages.pop() # remove flush msg
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Use auxiliary client for the flush call when available --
|
|
|
|
|
|
# it's cheaper and avoids Codex Responses API incompatibility.
|
|
|
|
|
|
from agent.auxiliary_client import get_text_auxiliary_client
|
|
|
|
|
|
aux_client, aux_model = get_text_auxiliary_client()
|
2026-02-22 10:15:17 -08:00
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
if aux_client:
|
|
|
|
|
|
api_kwargs = {
|
|
|
|
|
|
"model": aux_model,
|
|
|
|
|
|
"messages": api_messages,
|
|
|
|
|
|
"tools": [memory_tool_def],
|
|
|
|
|
|
"temperature": 0.3,
|
|
|
|
|
|
"max_tokens": 5120,
|
|
|
|
|
|
}
|
|
|
|
|
|
response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
|
|
|
|
|
|
elif self.api_mode == "codex_responses":
|
|
|
|
|
|
# No auxiliary client -- use the Codex Responses path directly
|
|
|
|
|
|
codex_kwargs = self._build_api_kwargs(api_messages)
|
|
|
|
|
|
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
|
|
|
|
|
|
codex_kwargs["temperature"] = 0.3
|
|
|
|
|
|
if "max_output_tokens" in codex_kwargs:
|
|
|
|
|
|
codex_kwargs["max_output_tokens"] = 5120
|
|
|
|
|
|
response = self._run_codex_stream(codex_kwargs)
|
|
|
|
|
|
else:
|
|
|
|
|
|
api_kwargs = {
|
|
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"messages": api_messages,
|
|
|
|
|
|
"tools": [memory_tool_def],
|
|
|
|
|
|
"temperature": 0.3,
|
|
|
|
|
|
**self._max_tokens_param(5120),
|
|
|
|
|
|
}
|
|
|
|
|
|
response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
|
2026-02-22 10:15:17 -08:00
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
# Extract tool calls from the response, handling both API formats
|
|
|
|
|
|
tool_calls = []
|
|
|
|
|
|
if self.api_mode == "codex_responses" and not aux_client:
|
|
|
|
|
|
assistant_msg, _ = self._normalize_codex_response(response)
|
|
|
|
|
|
if assistant_msg and assistant_msg.tool_calls:
|
|
|
|
|
|
tool_calls = assistant_msg.tool_calls
|
|
|
|
|
|
elif hasattr(response, "choices") and response.choices:
|
2026-02-22 10:15:17 -08:00
|
|
|
|
assistant_message = response.choices[0].message
|
|
|
|
|
|
if assistant_message.tool_calls:
|
2026-02-28 21:47:51 -08:00
|
|
|
|
tool_calls = assistant_message.tool_calls
|
|
|
|
|
|
|
|
|
|
|
|
for tc in tool_calls:
|
|
|
|
|
|
if tc.function.name == "memory":
|
|
|
|
|
|
try:
|
|
|
|
|
|
args = json.loads(tc.function.arguments)
|
|
|
|
|
|
flush_target = args.get("target", "memory")
|
|
|
|
|
|
from tools.memory_tool import memory_tool as _memory_tool
|
|
|
|
|
|
result = _memory_tool(
|
|
|
|
|
|
action=args.get("action"),
|
|
|
|
|
|
target=flush_target,
|
|
|
|
|
|
content=args.get("content"),
|
|
|
|
|
|
old_text=args.get("old_text"),
|
|
|
|
|
|
store=self._memory_store,
|
|
|
|
|
|
)
|
|
|
|
|
|
if self._honcho and flush_target == "user" and args.get("action") == "add":
|
|
|
|
|
|
self._honcho_save_user_observation(args.get("content", ""))
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Memory flush tool call failed: %s", e)
|
2026-02-22 10:15:17 -08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Memory flush API call failed: %s", e)
|
|
|
|
|
|
finally:
|
2026-02-27 03:21:49 -08:00
|
|
|
|
# Strip flush artifacts: remove everything from the flush message onward.
|
|
|
|
|
|
# Use sentinel marker instead of identity check for robustness.
|
|
|
|
|
|
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
|
2026-02-22 10:15:17 -08:00
|
|
|
|
messages.pop()
|
2026-02-27 03:21:49 -08:00
|
|
|
|
if not messages:
|
|
|
|
|
|
break
|
|
|
|
|
|
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
2026-02-22 10:15:17 -08:00
|
|
|
|
messages.pop()
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
|
|
|
|
|
|
"""Compress conversation context and split the session in SQLite.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
(compressed_messages, new_system_prompt) tuple
|
|
|
|
|
|
"""
|
2026-02-22 10:15:17 -08:00
|
|
|
|
# Pre-compression memory flush: let the model save memories before they're lost
|
|
|
|
|
|
self.flush_memories(messages, min_turns=0)
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
|
|
|
|
|
|
|
|
|
|
|
todo_snapshot = self._todo_store.format_for_injection()
|
|
|
|
|
|
if todo_snapshot:
|
|
|
|
|
|
compressed.append({"role": "user", "content": todo_snapshot})
|
|
|
|
|
|
|
|
|
|
|
|
self._invalidate_system_prompt()
|
|
|
|
|
|
new_system_prompt = self._build_system_prompt(system_message)
|
|
|
|
|
|
self._cached_system_prompt = new_system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
if self._session_db:
|
|
|
|
|
|
try:
|
2026-03-08 15:20:29 -07:00
|
|
|
|
# Propagate title to the new session with auto-numbering
|
|
|
|
|
|
old_title = self._session_db.get_session_title(self.session_id)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
self._session_db.end_session(self.session_id, "compression")
|
|
|
|
|
|
old_session_id = self.session_id
|
|
|
|
|
|
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
|
|
|
|
|
self._session_db.create_session(
|
|
|
|
|
|
session_id=self.session_id,
|
|
|
|
|
|
source=self.platform or "cli",
|
|
|
|
|
|
model=self.model,
|
|
|
|
|
|
parent_session_id=old_session_id,
|
|
|
|
|
|
)
|
2026-03-08 15:20:29 -07:00
|
|
|
|
# Auto-number the title for the continuation session
|
|
|
|
|
|
if old_title:
|
|
|
|
|
|
try:
|
|
|
|
|
|
new_title = self._session_db.get_next_title_in_lineage(old_title)
|
|
|
|
|
|
self._session_db.set_session_title(self.session_id, new_title)
|
|
|
|
|
|
except (ValueError, Exception) as e:
|
|
|
|
|
|
logger.debug("Could not propagate title on compression: %s", e)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
self._session_db.update_system_prompt(self.session_id, new_system_prompt)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Session DB compression split failed: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
return compressed, new_system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
def _execute_tool_calls(self, assistant_message, messages: list, effective_task_id: str) -> None:
|
|
|
|
|
|
"""Execute tool calls from the assistant message and append results to messages."""
|
|
|
|
|
|
for i, tool_call in enumerate(assistant_message.tool_calls, 1):
|
2026-02-23 02:11:33 -08:00
|
|
|
|
# SAFETY: check interrupt BEFORE starting each tool.
|
|
|
|
|
|
# If the user sent "stop" during a previous tool's execution,
|
|
|
|
|
|
# do NOT start any more tools -- skip them all immediately.
|
|
|
|
|
|
if self._interrupt_requested:
|
|
|
|
|
|
remaining_calls = assistant_message.tool_calls[i-1:]
|
|
|
|
|
|
if remaining_calls:
|
|
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)")
|
|
|
|
|
|
for skipped_tc in remaining_calls:
|
2026-03-08 18:58:23 -07:00
|
|
|
|
skipped_name = skipped_tc.function.name
|
2026-02-23 02:11:33 -08:00
|
|
|
|
skip_msg = {
|
|
|
|
|
|
"role": "tool",
|
2026-03-08 18:58:23 -07:00
|
|
|
|
"content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]",
|
2026-02-23 02:11:33 -08:00
|
|
|
|
"tool_call_id": skipped_tc.id,
|
|
|
|
|
|
}
|
|
|
|
|
|
messages.append(skip_msg)
|
|
|
|
|
|
self._log_msg_to_db(skip_msg)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
function_name = tool_call.function.name
|
|
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
# Reset nudge counters when the relevant tool is actually used
|
|
|
|
|
|
if function_name == "memory":
|
|
|
|
|
|
self._turns_since_memory = 0
|
|
|
|
|
|
elif function_name == "skill_manage":
|
|
|
|
|
|
self._iters_since_skill = 0
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
try:
|
|
|
|
|
|
function_args = json.loads(tool_call.function.arguments)
|
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
|
logging.warning(f"Unexpected JSON error after validation: {e}")
|
|
|
|
|
|
function_args = {}
|
fix: add upstream guard for non-dict function_args + tests for build_tool_preview
Complements PR #453 by 0xbyt4. Adds isinstance(dict) guard in
run_agent.py to catch cases where json.loads returns non-dict
(e.g. null, list, string) before they reach downstream code.
Also adds 15 tests for build_tool_preview covering None args,
empty dicts, known/unknown tools, fallback keys, truncation,
and all special-cased tools (process, todo, memory, session_search).
2026-03-09 21:01:40 -07:00
|
|
|
|
if not isinstance(function_args, dict):
|
|
|
|
|
|
function_args = {}
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
args_str = json.dumps(function_args, ensure_ascii=False)
|
|
|
|
|
|
args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
|
|
|
|
|
|
print(f" 📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
if self.tool_progress_callback:
|
|
|
|
|
|
try:
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args)
|
2026-03-02 05:23:15 -08:00
|
|
|
|
self.tool_progress_callback(function_name, preview, function_args)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
except Exception as cb_err:
|
|
|
|
|
|
logging.debug(f"Tool progress callback error: {cb_err}")
|
|
|
|
|
|
|
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
|
|
|
|
# Checkpoint: snapshot working dir before file-mutating tools
|
|
|
|
|
|
if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_path = function_args.get("path", "")
|
|
|
|
|
|
if file_path:
|
|
|
|
|
|
work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
|
|
|
|
|
|
self._checkpoint_mgr.ensure_checkpoint(
|
|
|
|
|
|
work_dir, f"before {function_name}"
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # never block tool execution
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
tool_start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
if function_name == "todo":
|
|
|
|
|
|
from tools.todo_tool import todo_tool as _todo_tool
|
|
|
|
|
|
function_result = _todo_tool(
|
|
|
|
|
|
todos=function_args.get("todos"),
|
|
|
|
|
|
merge=function_args.get("merge", False),
|
|
|
|
|
|
store=self._todo_store,
|
|
|
|
|
|
)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if self.quiet_mode:
|
2026-02-22 02:16:11 -08:00
|
|
|
|
print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
2026-02-27 00:32:17 -05:00
|
|
|
|
elif function_name == "session_search":
|
|
|
|
|
|
if not self._session_db:
|
|
|
|
|
|
function_result = json.dumps({"success": False, "error": "Session database not available."})
|
|
|
|
|
|
else:
|
|
|
|
|
|
from tools.session_search_tool import session_search as _session_search
|
|
|
|
|
|
function_result = _session_search(
|
|
|
|
|
|
query=function_args.get("query", ""),
|
|
|
|
|
|
role_filter=function_args.get("role_filter"),
|
|
|
|
|
|
limit=function_args.get("limit", 3),
|
|
|
|
|
|
db=self._session_db,
|
2026-03-04 06:06:40 -08:00
|
|
|
|
current_session_id=self.session_id,
|
2026-02-27 00:32:17 -05:00
|
|
|
|
)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if self.quiet_mode:
|
2026-02-22 02:16:11 -08:00
|
|
|
|
print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
2026-02-21 04:17:27 -08:00
|
|
|
|
elif function_name == "memory":
|
2026-02-26 18:02:24 -05:00
|
|
|
|
target = function_args.get("target", "memory")
|
2026-02-21 04:17:27 -08:00
|
|
|
|
from tools.memory_tool import memory_tool as _memory_tool
|
|
|
|
|
|
function_result = _memory_tool(
|
|
|
|
|
|
action=function_args.get("action"),
|
2026-02-26 18:07:33 -05:00
|
|
|
|
target=target,
|
2026-02-21 04:17:27 -08:00
|
|
|
|
content=function_args.get("content"),
|
|
|
|
|
|
old_text=function_args.get("old_text"),
|
|
|
|
|
|
store=self._memory_store,
|
|
|
|
|
|
)
|
2026-02-26 18:07:33 -05:00
|
|
|
|
# Also send user observations to Honcho when active
|
2026-02-26 18:02:24 -05:00
|
|
|
|
if self._honcho and target == "user" and function_args.get("action") == "add":
|
2026-02-26 18:07:33 -05:00
|
|
|
|
self._honcho_save_user_observation(function_args.get("content", ""))
|
2026-02-21 04:17:27 -08:00
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if self.quiet_mode:
|
2026-02-22 02:16:11 -08:00
|
|
|
|
print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
2026-02-21 04:17:27 -08:00
|
|
|
|
elif function_name == "clarify":
|
|
|
|
|
|
from tools.clarify_tool import clarify_tool as _clarify_tool
|
|
|
|
|
|
function_result = _clarify_tool(
|
|
|
|
|
|
question=function_args.get("question", ""),
|
|
|
|
|
|
choices=function_args.get("choices"),
|
|
|
|
|
|
callback=self.clarify_callback,
|
|
|
|
|
|
)
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
if self.quiet_mode:
|
2026-02-22 02:16:11 -08:00
|
|
|
|
print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
|
2026-02-21 04:17:27 -08:00
|
|
|
|
elif function_name == "delegate_task":
|
|
|
|
|
|
from tools.delegate_tool import delegate_task as _delegate_task
|
|
|
|
|
|
tasks_arg = function_args.get("tasks")
|
|
|
|
|
|
if tasks_arg and isinstance(tasks_arg, list):
|
|
|
|
|
|
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
|
|
|
|
|
|
else:
|
|
|
|
|
|
goal_preview = (function_args.get("goal") or "")[:30]
|
|
|
|
|
|
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
|
|
|
|
|
|
spinner = None
|
|
|
|
|
|
if self.quiet_mode:
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
|
|
|
|
|
|
spinner.start()
|
|
|
|
|
|
self._delegate_spinner = spinner
|
2026-02-22 02:16:11 -08:00
|
|
|
|
_delegate_result = None
|
2026-02-21 04:17:27 -08:00
|
|
|
|
try:
|
|
|
|
|
|
function_result = _delegate_task(
|
|
|
|
|
|
goal=function_args.get("goal"),
|
|
|
|
|
|
context=function_args.get("context"),
|
|
|
|
|
|
toolsets=function_args.get("toolsets"),
|
|
|
|
|
|
tasks=tasks_arg,
|
|
|
|
|
|
max_iterations=function_args.get("max_iterations"),
|
|
|
|
|
|
parent_agent=self,
|
|
|
|
|
|
)
|
2026-02-22 02:16:11 -08:00
|
|
|
|
_delegate_result = function_result
|
2026-02-21 04:17:27 -08:00
|
|
|
|
finally:
|
|
|
|
|
|
self._delegate_spinner = None
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
2026-02-22 02:16:11 -08:00
|
|
|
|
cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if spinner:
|
|
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
elif self.quiet_mode:
|
|
|
|
|
|
print(f" {cute_msg}")
|
|
|
|
|
|
elif self.quiet_mode:
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
|
|
|
|
|
|
tool_emoji_map = {
|
|
|
|
|
|
'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
|
|
|
|
|
|
'terminal': '💻', 'process': '⚙️',
|
|
|
|
|
|
'read_file': '📖', 'write_file': '✍️', 'patch': '🔧', 'search_files': '🔎',
|
|
|
|
|
|
'browser_navigate': '🌐', 'browser_snapshot': '📸',
|
|
|
|
|
|
'browser_click': '👆', 'browser_type': '⌨️',
|
|
|
|
|
|
'browser_scroll': '📜', 'browser_back': '◀️',
|
|
|
|
|
|
'browser_press': '⌨️', 'browser_close': '🚪',
|
|
|
|
|
|
'browser_get_images': '🖼️', 'browser_vision': '👁️',
|
|
|
|
|
|
'image_generate': '🎨', 'text_to_speech': '🔊',
|
|
|
|
|
|
'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
|
|
|
|
|
|
'skills_list': '📚', 'skill_view': '📚',
|
|
|
|
|
|
'schedule_cronjob': '⏰', 'list_cronjobs': '⏰', 'remove_cronjob': '⏰',
|
|
|
|
|
|
'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
|
|
|
|
|
|
'clarify': '❓', 'execute_code': '🐍', 'delegate_task': '🔀',
|
|
|
|
|
|
}
|
|
|
|
|
|
emoji = tool_emoji_map.get(function_name, '⚡')
|
|
|
|
|
|
preview = _build_tool_preview(function_name, function_args) or function_name
|
|
|
|
|
|
if len(preview) > 30:
|
|
|
|
|
|
preview = preview[:27] + "..."
|
|
|
|
|
|
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
|
|
|
|
|
|
spinner.start()
|
2026-02-22 02:16:11 -08:00
|
|
|
|
_spinner_result = None
|
2026-02-21 04:17:27 -08:00
|
|
|
|
try:
|
|
|
|
|
|
function_result = handle_function_call(function_name, function_args, effective_task_id)
|
2026-02-22 02:16:11 -08:00
|
|
|
|
_spinner_result = function_result
|
2026-02-27 03:21:49 -08:00
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
2026-02-28 22:49:58 -08:00
|
|
|
|
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
finally:
|
|
|
|
|
|
tool_duration = time.time() - tool_start_time
|
2026-02-22 02:16:11 -08:00
|
|
|
|
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
spinner.stop(cute_msg)
|
|
|
|
|
|
else:
|
2026-02-27 03:21:49 -08:00
|
|
|
|
try:
|
|
|
|
|
|
function_result = handle_function_call(function_name, function_args, effective_task_id)
|
|
|
|
|
|
except Exception as tool_error:
|
|
|
|
|
|
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
2026-02-28 22:49:58 -08:00
|
|
|
|
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
tool_duration = time.time() - tool_start_time
|
|
|
|
|
|
|
|
|
|
|
|
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
|
|
|
|
|
|
2026-02-28 22:49:58 -08:00
|
|
|
|
# Log tool errors to the persistent error log so [error] tags
|
|
|
|
|
|
# in the UI always have a corresponding detailed entry on disk.
|
|
|
|
|
|
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
|
|
|
|
|
|
if _is_error_result:
|
|
|
|
|
|
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
|
|
|
|
|
|
logging.debug(f"Tool result preview: {result_preview}...")
|
|
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Guard against tools returning absurdly large content that would
|
|
|
|
|
|
# blow up the context window. 100K chars ≈ 25K tokens — generous
|
|
|
|
|
|
# enough for any reasonable tool output but prevents catastrophic
|
|
|
|
|
|
# context explosions (e.g. accidental base64 image dumps).
|
|
|
|
|
|
MAX_TOOL_RESULT_CHARS = 100_000
|
|
|
|
|
|
if len(function_result) > MAX_TOOL_RESULT_CHARS:
|
|
|
|
|
|
original_len = len(function_result)
|
|
|
|
|
|
function_result = (
|
|
|
|
|
|
function_result[:MAX_TOOL_RESULT_CHARS]
|
|
|
|
|
|
+ f"\n\n[Truncated: tool response was {original_len:,} chars, "
|
|
|
|
|
|
f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-22 17:10:24 -08:00
|
|
|
|
tool_msg = {
|
2026-02-21 04:17:27 -08:00
|
|
|
|
"role": "tool",
|
|
|
|
|
|
"content": function_result,
|
|
|
|
|
|
"tool_call_id": tool_call.id
|
2026-02-22 17:10:24 -08:00
|
|
|
|
}
|
|
|
|
|
|
messages.append(tool_msg)
|
|
|
|
|
|
self._log_msg_to_db(tool_msg)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
|
|
|
|
|
|
print(f" ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
|
|
|
|
|
|
|
|
|
|
|
|
if self._interrupt_requested and i < len(assistant_message.tool_calls):
|
|
|
|
|
|
remaining = len(assistant_message.tool_calls) - i
|
|
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)")
|
|
|
|
|
|
for skipped_tc in assistant_message.tool_calls[i:]:
|
2026-03-08 18:58:23 -07:00
|
|
|
|
skipped_name = skipped_tc.function.name
|
2026-02-22 17:10:24 -08:00
|
|
|
|
skip_msg = {
|
2026-02-21 04:17:27 -08:00
|
|
|
|
"role": "tool",
|
2026-03-08 18:58:23 -07:00
|
|
|
|
"content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]",
|
2026-02-21 04:17:27 -08:00
|
|
|
|
"tool_call_id": skipped_tc.id
|
2026-02-22 17:10:24 -08:00
|
|
|
|
}
|
|
|
|
|
|
messages.append(skip_msg)
|
|
|
|
|
|
self._log_msg_to_db(skip_msg)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
|
|
|
|
|
|
time.sleep(self.tool_delay)
|
|
|
|
|
|
|
|
|
|
|
|
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
|
|
|
|
|
|
"""Request a summary when max iterations are reached. Returns the final response text."""
|
|
|
|
|
|
print(f"⚠️ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
|
|
|
|
|
|
|
|
|
|
|
|
summary_request = (
|
|
|
|
|
|
"You've reached the maximum number of tool-calling iterations allowed. "
|
|
|
|
|
|
"Please provide a final response summarizing what you've found and accomplished so far, "
|
|
|
|
|
|
"without calling any more tools."
|
|
|
|
|
|
)
|
|
|
|
|
|
messages.append({"role": "user", "content": summary_request})
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2026-03-04 21:08:20 -08:00
|
|
|
|
# Build API messages, stripping internal-only fields
|
|
|
|
|
|
# (finish_reason, reasoning) that strict APIs like Mistral reject with 422
|
|
|
|
|
|
api_messages = []
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
api_msg = msg.copy()
|
|
|
|
|
|
for internal_field in ("reasoning", "finish_reason"):
|
|
|
|
|
|
api_msg.pop(internal_field, None)
|
|
|
|
|
|
api_messages.append(api_msg)
|
|
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
effective_system = self._cached_system_prompt or ""
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if self.ephemeral_system_prompt:
|
2026-02-23 23:55:42 -08:00
|
|
|
|
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
|
|
|
|
|
if effective_system:
|
|
|
|
|
|
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
|
|
|
|
|
if self.prefill_messages:
|
|
|
|
|
|
sys_offset = 1 if effective_system else 0
|
|
|
|
|
|
for idx, pfm in enumerate(self.prefill_messages):
|
|
|
|
|
|
api_messages.insert(sys_offset + idx, pfm.copy())
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
summary_extra_body = {}
|
2026-02-25 16:49:41 -08:00
|
|
|
|
_is_openrouter = "openrouter" in self.base_url.lower()
|
|
|
|
|
|
_is_nous = "nousresearch" in self.base_url.lower()
|
|
|
|
|
|
if _is_openrouter or _is_nous:
|
2026-02-21 04:17:27 -08:00
|
|
|
|
if self.reasoning_config is not None:
|
|
|
|
|
|
summary_extra_body["reasoning"] = self.reasoning_config
|
|
|
|
|
|
else:
|
|
|
|
|
|
summary_extra_body["reasoning"] = {
|
|
|
|
|
|
"enabled": True,
|
2026-03-07 10:14:19 -08:00
|
|
|
|
"effort": "medium"
|
2026-02-21 04:17:27 -08:00
|
|
|
|
}
|
2026-02-25 16:49:41 -08:00
|
|
|
|
if _is_nous:
|
|
|
|
|
|
summary_extra_body["tags"] = ["product=hermes-agent"]
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
2026-02-28 21:47:51 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
codex_kwargs = self._build_api_kwargs(api_messages)
|
2026-03-03 03:42:44 -08:00
|
|
|
|
codex_kwargs.pop("tools", None)
|
2026-02-28 21:47:51 -08:00
|
|
|
|
summary_response = self._run_codex_stream(codex_kwargs)
|
|
|
|
|
|
assistant_message, _ = self._normalize_codex_response(summary_response)
|
|
|
|
|
|
final_response = (assistant_message.content or "").strip() if assistant_message else ""
|
2026-02-21 04:17:27 -08:00
|
|
|
|
else:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
summary_kwargs = {
|
|
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"messages": api_messages,
|
|
|
|
|
|
}
|
|
|
|
|
|
if self.max_tokens is not None:
|
2026-02-28 21:47:51 -08:00
|
|
|
|
summary_kwargs.update(self._max_tokens_param(self.max_tokens))
|
2026-03-01 18:24:27 -08:00
|
|
|
|
|
|
|
|
|
|
# Include provider routing preferences
|
|
|
|
|
|
provider_preferences = {}
|
|
|
|
|
|
if self.providers_allowed:
|
|
|
|
|
|
provider_preferences["only"] = self.providers_allowed
|
|
|
|
|
|
if self.providers_ignored:
|
|
|
|
|
|
provider_preferences["ignore"] = self.providers_ignored
|
|
|
|
|
|
if self.providers_order:
|
|
|
|
|
|
provider_preferences["order"] = self.providers_order
|
|
|
|
|
|
if self.provider_sort:
|
|
|
|
|
|
provider_preferences["sort"] = self.provider_sort
|
|
|
|
|
|
if provider_preferences:
|
|
|
|
|
|
summary_extra_body["provider"] = provider_preferences
|
|
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if summary_extra_body:
|
|
|
|
|
|
summary_kwargs["extra_body"] = summary_extra_body
|
|
|
|
|
|
|
|
|
|
|
|
summary_response = self.client.chat.completions.create(**summary_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
if summary_response.choices and summary_response.choices[0].message.content:
|
|
|
|
|
|
final_response = summary_response.choices[0].message.content
|
2026-02-28 21:47:51 -08:00
|
|
|
|
else:
|
|
|
|
|
|
final_response = ""
|
|
|
|
|
|
|
|
|
|
|
|
if final_response:
|
|
|
|
|
|
if "<think>" in final_response:
|
|
|
|
|
|
final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
|
|
|
|
|
|
if final_response:
|
|
|
|
|
|
messages.append({"role": "assistant", "content": final_response})
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_response = "I reached the iteration limit and couldn't generate a summary."
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Retry summary generation
|
|
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
codex_kwargs = self._build_api_kwargs(api_messages)
|
2026-03-03 03:42:44 -08:00
|
|
|
|
codex_kwargs.pop("tools", None)
|
2026-02-28 21:47:51 -08:00
|
|
|
|
retry_response = self._run_codex_stream(codex_kwargs)
|
|
|
|
|
|
retry_msg, _ = self._normalize_codex_response(retry_response)
|
|
|
|
|
|
final_response = (retry_msg.content or "").strip() if retry_msg else ""
|
|
|
|
|
|
else:
|
|
|
|
|
|
summary_kwargs = {
|
|
|
|
|
|
"model": self.model,
|
|
|
|
|
|
"messages": api_messages,
|
|
|
|
|
|
}
|
|
|
|
|
|
if self.max_tokens is not None:
|
2026-03-05 17:49:37 +03:00
|
|
|
|
summary_kwargs.update(self._max_tokens_param(self.max_tokens))
|
2026-02-28 21:47:51 -08:00
|
|
|
|
if summary_extra_body:
|
|
|
|
|
|
summary_kwargs["extra_body"] = summary_extra_body
|
|
|
|
|
|
|
|
|
|
|
|
summary_response = self.client.chat.completions.create(**summary_kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
if summary_response.choices and summary_response.choices[0].message.content:
|
|
|
|
|
|
final_response = summary_response.choices[0].message.content
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_response = ""
|
|
|
|
|
|
|
|
|
|
|
|
if final_response:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if "<think>" in final_response:
|
|
|
|
|
|
final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
|
2026-03-05 18:57:59 +03:00
|
|
|
|
if final_response:
|
|
|
|
|
|
messages.append({"role": "assistant", "content": final_response})
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_response = "I reached the iteration limit and couldn't generate a summary."
|
2026-02-25 18:20:38 -08:00
|
|
|
|
else:
|
|
|
|
|
|
final_response = "I reached the iteration limit and couldn't generate a summary."
|
2026-02-21 04:17:27 -08:00
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logging.warning(f"Failed to get summary response: {e}")
|
|
|
|
|
|
final_response = f"I reached the maximum iterations ({self.max_iterations}) but couldn't summarize. Error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
return final_response
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
def run_conversation(
|
2025-11-04 03:32:43 -05:00
|
|
|
|
self,
|
|
|
|
|
|
user_message: str,
|
|
|
|
|
|
system_message: str = None,
|
|
|
|
|
|
conversation_history: List[Dict[str, Any]] = None,
|
|
|
|
|
|
task_id: str = None
|
2025-09-10 00:43:55 -07:00
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Run a complete conversation with tool calling until completion.
|
2025-11-04 03:32:43 -05:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Args:
|
|
|
|
|
|
user_message (str): The user's message/question
|
2025-10-08 02:33:58 +00:00
|
|
|
|
system_message (str): Custom system message (optional, overrides ephemeral_system_prompt if provided)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
conversation_history (List[Dict]): Previous conversation messages (optional)
|
2025-11-04 03:32:43 -05:00
|
|
|
|
task_id (str): Unique identifier for this task to isolate VMs between concurrent tasks (optional, auto-generated if not provided)
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Returns:
|
|
|
|
|
|
Dict: Complete conversation result with final response and message history
|
|
|
|
|
|
"""
|
2025-11-04 03:32:43 -05:00
|
|
|
|
# Generate unique task_id if not provided to isolate VMs between concurrent tasks
|
|
|
|
|
|
effective_task_id = task_id or str(uuid.uuid4())
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
2026-03-07 10:32:51 -08:00
|
|
|
|
# Reset retry counters and iteration budget at the start of each turn
|
|
|
|
|
|
# so subagent usage from a previous turn doesn't eat into the next one.
|
2026-01-10 13:04:43 +00:00
|
|
|
|
self._invalid_tool_retries = 0
|
|
|
|
|
|
self._invalid_json_retries = 0
|
|
|
|
|
|
self._empty_content_retries = 0
|
2026-03-07 20:12:08 +03:00
|
|
|
|
self._incomplete_scratchpad_retries = 0
|
|
|
|
|
|
self._codex_incomplete_retries = 0
|
2026-02-22 21:33:28 -08:00
|
|
|
|
self._last_content_with_tools = None
|
|
|
|
|
|
self._turns_since_memory = 0
|
|
|
|
|
|
self._iters_since_skill = 0
|
2026-03-07 10:32:51 -08:00
|
|
|
|
self.iteration_budget = IterationBudget(self.max_iterations)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
2026-03-01 03:06:13 +03:00
|
|
|
|
# Initialize conversation (copy to avoid mutating the caller's list)
|
|
|
|
|
|
messages = list(conversation_history) if conversation_history else []
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-17 17:02:33 -08:00
|
|
|
|
# Hydrate todo store from conversation history (gateway creates a fresh
|
|
|
|
|
|
# AIAgent per message, so the in-memory store is empty -- we need to
|
|
|
|
|
|
# recover the todo state from the most recent todo tool response in history)
|
|
|
|
|
|
if conversation_history and not self._todo_store.has_items():
|
|
|
|
|
|
self._hydrate_todo_store(conversation_history)
|
|
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Prefill messages (few-shot priming) are injected at API-call time only,
|
|
|
|
|
|
# never stored in the messages list. This keeps them ephemeral: they won't
|
|
|
|
|
|
# be saved to session DB, session logs, or batch trajectories, but they're
|
|
|
|
|
|
# automatically re-applied on every API call (including session continuations).
|
2026-02-08 10:49:24 +00:00
|
|
|
|
|
2026-02-22 10:15:17 -08:00
|
|
|
|
# Track user turns for memory flush and periodic nudge logic
|
|
|
|
|
|
self._user_turn_count += 1
|
|
|
|
|
|
|
2026-02-25 19:34:25 -05:00
|
|
|
|
# Preserve the original user message before nudge injection.
|
|
|
|
|
|
# Honcho should receive the actual user input, not system nudges.
|
|
|
|
|
|
original_user_message = user_message
|
|
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
# Periodic memory nudge: remind the model to consider saving memories.
|
|
|
|
|
|
# Counter resets whenever the memory tool is actually used.
|
2026-02-22 10:15:17 -08:00
|
|
|
|
if (self._memory_nudge_interval > 0
|
|
|
|
|
|
and "memory" in self.valid_tool_names
|
|
|
|
|
|
and self._memory_store):
|
2026-02-22 21:33:28 -08:00
|
|
|
|
self._turns_since_memory += 1
|
|
|
|
|
|
if self._turns_since_memory >= self._memory_nudge_interval:
|
|
|
|
|
|
user_message += (
|
|
|
|
|
|
"\n\n[System: You've had several exchanges in this session. "
|
|
|
|
|
|
"Consider whether there's anything worth saving to your memories.]"
|
|
|
|
|
|
)
|
|
|
|
|
|
self._turns_since_memory = 0
|
|
|
|
|
|
|
|
|
|
|
|
# Skill creation nudge: fires on the first user message after a long tool loop.
|
|
|
|
|
|
# The counter increments per API iteration in the tool loop and is checked here.
|
|
|
|
|
|
if (self._skill_nudge_interval > 0
|
|
|
|
|
|
and self._iters_since_skill >= self._skill_nudge_interval
|
|
|
|
|
|
and "skill_manage" in self.valid_tool_names):
|
2026-02-22 10:15:17 -08:00
|
|
|
|
user_message += (
|
2026-02-22 21:33:28 -08:00
|
|
|
|
"\n\n[System: The previous task involved many steps. "
|
|
|
|
|
|
"If you discovered a reusable workflow, consider saving it as a skill.]"
|
2026-02-22 10:15:17 -08:00
|
|
|
|
)
|
2026-02-22 21:33:28 -08:00
|
|
|
|
self._iters_since_skill = 0
|
2026-02-22 10:15:17 -08:00
|
|
|
|
|
2026-03-09 01:50:58 -07:00
|
|
|
|
# Honcho prefetch: retrieve user context for system prompt injection.
|
|
|
|
|
|
# Only on the FIRST turn of a session (empty history). On subsequent
|
|
|
|
|
|
# turns the model already has all prior context in its conversation
|
|
|
|
|
|
# history, and the Honcho context is baked into the stored system
|
|
|
|
|
|
# prompt — re-fetching it would change the system message and break
|
|
|
|
|
|
# Anthropic prompt caching.
|
2026-02-25 19:34:25 -05:00
|
|
|
|
self._honcho_context = ""
|
2026-03-09 01:50:58 -07:00
|
|
|
|
if self._honcho and self._honcho_session_key and not conversation_history:
|
2026-02-25 19:34:25 -05:00
|
|
|
|
try:
|
|
|
|
|
|
self._honcho_context = self._honcho_prefetch(user_message)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Honcho prefetch failed (non-fatal): %s", e)
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Add user message
|
2026-02-22 17:10:24 -08:00
|
|
|
|
user_msg = {"role": "user", "content": user_message}
|
|
|
|
|
|
messages.append(user_msg)
|
|
|
|
|
|
self._log_msg_to_db(user_msg)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
# ── System prompt (cached per session for prefix caching) ──
|
|
|
|
|
|
# Built once on first call, reused for all subsequent calls.
|
|
|
|
|
|
# Only rebuilt after context compression events (which invalidate
|
|
|
|
|
|
# the cache and reload memory from disk).
|
2026-03-09 01:50:58 -07:00
|
|
|
|
#
|
|
|
|
|
|
# For continuing sessions (gateway creates a fresh AIAgent per
|
|
|
|
|
|
# message), we load the stored system prompt from the session DB
|
|
|
|
|
|
# instead of rebuilding. Rebuilding would pick up memory changes
|
|
|
|
|
|
# from disk that the model already knows about (it wrote them!),
|
|
|
|
|
|
# producing a different system prompt and breaking the Anthropic
|
|
|
|
|
|
# prefix cache.
|
2026-02-19 00:57:31 -08:00
|
|
|
|
if self._cached_system_prompt is None:
|
2026-03-09 01:50:58 -07:00
|
|
|
|
stored_prompt = None
|
|
|
|
|
|
if conversation_history and self._session_db:
|
2026-02-19 00:57:31 -08:00
|
|
|
|
try:
|
2026-03-09 01:50:58 -07:00
|
|
|
|
session_row = self._session_db.get_session(self.session_id)
|
|
|
|
|
|
if session_row:
|
|
|
|
|
|
stored_prompt = session_row.get("system_prompt") or None
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # Fall through to build fresh
|
|
|
|
|
|
|
|
|
|
|
|
if stored_prompt:
|
|
|
|
|
|
# Continuing session — reuse the exact system prompt from
|
|
|
|
|
|
# the previous turn so the Anthropic cache prefix matches.
|
|
|
|
|
|
self._cached_system_prompt = stored_prompt
|
|
|
|
|
|
else:
|
|
|
|
|
|
# First turn of a new session — build from scratch.
|
|
|
|
|
|
self._cached_system_prompt = self._build_system_prompt(system_message)
|
|
|
|
|
|
# Bake Honcho context into the prompt so it's stable for
|
|
|
|
|
|
# the entire session (not re-fetched per turn).
|
|
|
|
|
|
if self._honcho_context:
|
|
|
|
|
|
self._cached_system_prompt = (
|
|
|
|
|
|
self._cached_system_prompt + "\n\n" + self._honcho_context
|
|
|
|
|
|
).strip()
|
|
|
|
|
|
# Store the system prompt snapshot in SQLite
|
|
|
|
|
|
if self._session_db:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.debug("Session DB update_system_prompt failed: %s", e)
|
2026-02-12 16:11:16 -08:00
|
|
|
|
|
2026-02-19 00:57:31 -08:00
|
|
|
|
active_system_prompt = self._cached_system_prompt
|
2026-02-12 16:11:16 -08:00
|
|
|
|
|
2026-03-04 14:42:41 -08:00
|
|
|
|
# ── Preflight context compression ──
|
|
|
|
|
|
# Before entering the main loop, check if the loaded conversation
|
|
|
|
|
|
# history already exceeds the model's context threshold. This handles
|
|
|
|
|
|
# cases where a user switches to a model with a smaller context window
|
|
|
|
|
|
# while having a large existing session — compress proactively rather
|
|
|
|
|
|
# than waiting for an API error (which might be caught as a non-retryable
|
|
|
|
|
|
# 4xx and abort the request entirely).
|
|
|
|
|
|
if (
|
|
|
|
|
|
self.compression_enabled
|
|
|
|
|
|
and len(messages) > self.context_compressor.protect_first_n
|
|
|
|
|
|
+ self.context_compressor.protect_last_n + 1
|
|
|
|
|
|
):
|
|
|
|
|
|
_sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
|
|
|
|
|
|
_msg_tok_est = estimate_messages_tokens_rough(messages)
|
|
|
|
|
|
_preflight_tokens = _sys_tok_est + _msg_tok_est
|
|
|
|
|
|
|
|
|
|
|
|
if _preflight_tokens >= self.context_compressor.threshold_tokens:
|
|
|
|
|
|
logger.info(
|
|
|
|
|
|
"Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
|
|
|
|
|
|
f"{_preflight_tokens:,}",
|
|
|
|
|
|
f"{self.context_compressor.threshold_tokens:,}",
|
|
|
|
|
|
self.model,
|
|
|
|
|
|
f"{self.context_compressor.context_length:,}",
|
|
|
|
|
|
)
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
|
|
|
|
|
|
f">= {self.context_compressor.threshold_tokens:,} threshold"
|
|
|
|
|
|
)
|
|
|
|
|
|
# May need multiple passes for very large sessions with small
|
|
|
|
|
|
# context windows (each pass summarises the middle N turns).
|
|
|
|
|
|
for _pass in range(3):
|
|
|
|
|
|
_orig_len = len(messages)
|
|
|
|
|
|
messages, active_system_prompt = self._compress_context(
|
|
|
|
|
|
messages, system_message, approx_tokens=_preflight_tokens
|
|
|
|
|
|
)
|
|
|
|
|
|
if len(messages) >= _orig_len:
|
|
|
|
|
|
break # Cannot compress further
|
|
|
|
|
|
# Re-estimate after compression
|
|
|
|
|
|
_sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
|
|
|
|
|
|
_msg_tok_est = estimate_messages_tokens_rough(messages)
|
|
|
|
|
|
_preflight_tokens = _sys_tok_est + _msg_tok_est
|
|
|
|
|
|
if _preflight_tokens < self.context_compressor.threshold_tokens:
|
|
|
|
|
|
break # Under threshold
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Main conversation loop
|
|
|
|
|
|
api_call_count = 0
|
|
|
|
|
|
final_response = None
|
2026-02-03 16:15:49 -08:00
|
|
|
|
interrupted = False
|
2026-02-25 18:11:26 -08:00
|
|
|
|
codex_ack_continuations = 0
|
2026-03-07 18:45:17 -05:00
|
|
|
|
length_continue_retries = 0
|
|
|
|
|
|
truncated_response_prefix = ""
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
|
|
# Clear any stale interrupt state at start
|
|
|
|
|
|
self.clear_interrupt()
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-03-07 08:16:37 -08:00
|
|
|
|
while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
|
feat: filesystem checkpoints and /rollback command
Automatic filesystem snapshots before destructive file operations,
with user-facing rollback. Inspired by PR #559 (by @alireza78a).
Architecture:
- Shadow git repos at ~/.hermes/checkpoints/{hash}/ via GIT_DIR
- CheckpointManager: take/list/restore, turn-scoped dedup, pruning
- Transparent — the LLM never sees it, no tool schema, no tokens
- Once per turn — only first write_file/patch triggers a snapshot
Integration:
- Config: checkpoints.enabled + checkpoints.max_snapshots
- CLI flag: hermes --checkpoints
- Trigger: run_agent.py _execute_tool_calls() before write_file/patch
- /rollback slash command in CLI + gateway (list, restore by number)
- Pre-rollback snapshot auto-created on restore (undo the undo)
Safety:
- Never blocks file operations — all errors silently logged
- Skips root dir, home dir, dirs >50K files
- Disables gracefully when git not installed
- Shadow repo completely isolated from project git
Tests: 35 new tests, all passing (2798 total suite)
Docs: feature page, config reference, CLI commands reference
2026-03-10 00:49:15 -07:00
|
|
|
|
# Reset per-turn checkpoint dedup so each iteration can take one snapshot
|
|
|
|
|
|
self._checkpoint_mgr.new_turn()
|
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
|
# Check for interrupt request (e.g., user sent new message)
|
|
|
|
|
|
if self._interrupt_requested:
|
|
|
|
|
|
interrupted = True
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"\n⚡ Breaking out of tool loop due to interrupt...")
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
api_call_count += 1
|
2026-03-07 08:16:37 -08:00
|
|
|
|
if not self.iteration_budget.consume():
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"\n⚠️ Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
|
|
|
|
|
|
break
|
2026-02-22 13:28:13 -08:00
|
|
|
|
|
2026-02-28 17:09:26 -08:00
|
|
|
|
# Fire step_callback for gateway hooks (agent:step event)
|
|
|
|
|
|
if self.step_callback is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
prev_tools = []
|
|
|
|
|
|
for _m in reversed(messages):
|
|
|
|
|
|
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
|
|
|
|
|
prev_tools = [
|
|
|
|
|
|
tc["function"]["name"]
|
|
|
|
|
|
for tc in _m["tool_calls"]
|
|
|
|
|
|
if isinstance(tc, dict)
|
|
|
|
|
|
]
|
|
|
|
|
|
break
|
|
|
|
|
|
self.step_callback(api_call_count, prev_tools)
|
|
|
|
|
|
except Exception as _step_err:
|
|
|
|
|
|
logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
|
|
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
# Track tool-calling iterations for skill nudge.
|
|
|
|
|
|
# Counter resets whenever skill_manage is actually used.
|
2026-02-22 13:28:13 -08:00
|
|
|
|
if (self._skill_nudge_interval > 0
|
|
|
|
|
|
and "skill_manage" in self.valid_tool_names):
|
2026-02-22 21:33:28 -08:00
|
|
|
|
self._iters_since_skill += 1
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
|
|
|
|
|
# Prepare messages for API call
|
|
|
|
|
|
# If we have an ephemeral system prompt, prepend it to the messages
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.
|
|
|
|
|
|
# However, providers like Moonshot AI require a separate 'reasoning_content' field
|
|
|
|
|
|
# on assistant messages with tool_calls. We handle both cases here.
|
|
|
|
|
|
api_messages = []
|
|
|
|
|
|
for msg in messages:
|
|
|
|
|
|
api_msg = msg.copy()
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-02-01 22:48:18 -08:00
|
|
|
|
# For ALL assistant messages, pass reasoning back to the API
|
|
|
|
|
|
# This ensures multi-turn reasoning context is preserved
|
|
|
|
|
|
if msg.get("role") == "assistant":
|
2026-01-29 06:10:24 +00:00
|
|
|
|
reasoning_text = msg.get("reasoning")
|
|
|
|
|
|
if reasoning_text:
|
2026-02-01 22:48:18 -08:00
|
|
|
|
# Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
|
2026-01-29 06:10:24 +00:00
|
|
|
|
api_msg["reasoning_content"] = reasoning_text
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Remove 'reasoning' field - it's for trajectory storage only
|
2026-02-01 22:48:18 -08:00
|
|
|
|
# We've copied it to 'reasoning_content' for the API above
|
2026-01-29 06:10:24 +00:00
|
|
|
|
if "reasoning" in api_msg:
|
|
|
|
|
|
api_msg.pop("reasoning")
|
2026-03-02 11:35:03 +03:00
|
|
|
|
# Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
|
|
|
|
|
|
if "finish_reason" in api_msg:
|
|
|
|
|
|
api_msg.pop("finish_reason")
|
2026-02-01 22:48:18 -08:00
|
|
|
|
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
|
|
|
|
|
|
# The signature field helps maintain reasoning continuity
|
2026-01-29 06:10:24 +00:00
|
|
|
|
api_messages.append(api_msg)
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Build the final system message: cached prompt + ephemeral system prompt.
|
|
|
|
|
|
# The ephemeral part is appended here (not baked into the cached prompt)
|
|
|
|
|
|
# so it stays out of the session DB and logs.
|
2026-03-09 01:50:58 -07:00
|
|
|
|
# Note: Honcho context is baked into _cached_system_prompt on the first
|
|
|
|
|
|
# turn and stored in the session DB, so it does NOT need to be injected
|
|
|
|
|
|
# here. This keeps the system message identical across all turns in a
|
|
|
|
|
|
# session, maximizing Anthropic prompt cache hits.
|
2026-02-23 23:55:42 -08:00
|
|
|
|
effective_system = active_system_prompt or ""
|
|
|
|
|
|
if self.ephemeral_system_prompt:
|
|
|
|
|
|
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
|
|
|
|
|
if effective_system:
|
|
|
|
|
|
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-02-23 23:55:42 -08:00
|
|
|
|
# Inject ephemeral prefill messages right after the system prompt
|
|
|
|
|
|
# but before conversation history. Same API-call-time-only pattern.
|
|
|
|
|
|
if self.prefill_messages:
|
|
|
|
|
|
sys_offset = 1 if effective_system else 0
|
|
|
|
|
|
for idx, pfm in enumerate(self.prefill_messages):
|
|
|
|
|
|
api_messages.insert(sys_offset + idx, pfm.copy())
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-02-10 06:49:41 +00:00
|
|
|
|
# Apply Anthropic prompt caching for Claude models via OpenRouter.
|
|
|
|
|
|
# Auto-detected: if model name contains "claude" and base_url is OpenRouter,
|
|
|
|
|
|
# inject cache_control breakpoints (system + last 3 messages) to reduce
|
|
|
|
|
|
# input token costs by ~75% on multi-turn conversations.
|
|
|
|
|
|
if self._use_prompt_caching:
|
|
|
|
|
|
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl)
|
2026-03-07 15:13:45 -05:00
|
|
|
|
|
2026-03-07 08:08:00 -08:00
|
|
|
|
# Safety net: strip orphaned tool results / add stubs for missing
|
|
|
|
|
|
# results before sending to the API. The compressor handles this
|
|
|
|
|
|
# during compression, but orphans can also sneak in from session
|
|
|
|
|
|
# loading or manual message manipulation.
|
|
|
|
|
|
if hasattr(self, 'context_compressor') and self.context_compressor:
|
|
|
|
|
|
api_messages = self.context_compressor._sanitize_tool_pairs(api_messages)
|
|
|
|
|
|
|
2026-01-08 08:57:51 +00:00
|
|
|
|
# Calculate approximate request size for logging
|
|
|
|
|
|
total_chars = sum(len(str(msg)) for msg in api_messages)
|
|
|
|
|
|
approx_tokens = total_chars // 4 # Rough estimate: 4 chars per token
|
|
|
|
|
|
|
2026-01-31 06:30:48 +00:00
|
|
|
|
# Thinking spinner for quiet mode (animated during API call)
|
|
|
|
|
|
thinking_spinner = None
|
|
|
|
|
|
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
|
|
|
|
|
|
print(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
|
|
|
|
|
|
print(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Animated thinking spinner in quiet mode
|
|
|
|
|
|
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
|
|
|
|
|
|
verb = random.choice(KawaiiSpinner.THINKING_VERBS)
|
2026-03-09 23:26:43 -07:00
|
|
|
|
if self.thinking_callback:
|
|
|
|
|
|
# CLI TUI mode: use prompt_toolkit widget instead of raw spinner
|
|
|
|
|
|
self.thinking_callback(f"{face} {verb}...")
|
|
|
|
|
|
else:
|
|
|
|
|
|
spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
|
|
|
|
|
|
thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
|
|
|
|
|
|
thinking_spinner.start()
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
# Log request details if verbose
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}")
|
|
|
|
|
|
logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}")
|
2026-01-08 08:57:51 +00:00
|
|
|
|
logging.debug(f"Total message size: ~{approx_tokens:,} tokens")
|
2025-09-10 00:51:41 -07:00
|
|
|
|
|
|
|
|
|
|
api_start_time = time.time()
|
|
|
|
|
|
retry_count = 0
|
2025-11-17 18:35:36 -05:00
|
|
|
|
max_retries = 6 # Increased to allow longer backoff periods
|
2026-03-07 09:19:07 -08:00
|
|
|
|
compression_attempts = 0
|
|
|
|
|
|
max_compression_attempts = 3
|
2026-02-25 19:27:54 -08:00
|
|
|
|
codex_auth_retry_attempted = False
|
2026-03-07 13:34:23 +11:00
|
|
|
|
nous_auth_retry_attempted = False
|
2026-03-07 15:13:45 -05:00
|
|
|
|
restart_with_compressed_messages = False
|
2026-03-07 18:45:17 -05:00
|
|
|
|
restart_with_length_continuation = False
|
2025-11-17 18:35:36 -05:00
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
finish_reason = "stop"
|
2026-03-07 09:19:07 -08:00
|
|
|
|
response = None # Guard against UnboundLocalError if all retries fail
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
2026-02-27 03:21:49 -08:00
|
|
|
|
while retry_count < max_retries:
|
2025-09-10 00:51:41 -07:00
|
|
|
|
try:
|
2026-02-21 04:17:27 -08:00
|
|
|
|
api_kwargs = self._build_api_kwargs(api_messages)
|
2026-02-25 19:27:54 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
|
2026-02-20 17:24:00 -08:00
|
|
|
|
|
|
|
|
|
|
if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
|
|
|
|
|
|
self._dump_api_request_debug(api_kwargs, reason="preflight")
|
|
|
|
|
|
|
2026-02-21 12:15:40 -08:00
|
|
|
|
response = self._interruptible_api_call(api_kwargs)
|
2025-09-10 00:51:41 -07:00
|
|
|
|
|
|
|
|
|
|
api_duration = time.time() - api_start_time
|
2026-01-31 06:30:48 +00:00
|
|
|
|
|
2026-02-19 01:56:04 -08:00
|
|
|
|
# Stop thinking spinner silently -- the response box or tool
|
|
|
|
|
|
# execution messages that follow are more informative.
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if thinking_spinner:
|
2026-02-19 01:56:04 -08:00
|
|
|
|
thinking_spinner.stop("")
|
2026-01-31 06:30:48 +00:00
|
|
|
|
thinking_spinner = None
|
2026-03-09 23:26:43 -07:00
|
|
|
|
if self.thinking_callback:
|
|
|
|
|
|
self.thinking_callback("")
|
2026-01-31 06:30:48 +00:00
|
|
|
|
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"{self.log_prefix}⏱️ API call completed in {api_duration:.2f}s")
|
2025-09-10 00:51:41 -07:00
|
|
|
|
|
|
|
|
|
|
if self.verbose_logging:
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Log response with provider info if available
|
|
|
|
|
|
resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
|
|
|
|
|
|
logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
|
2026-02-08 10:49:24 +00:00
|
|
|
|
|
2026-02-25 18:20:38 -08:00
|
|
|
|
# Validate response shape before proceeding
|
|
|
|
|
|
response_invalid = False
|
|
|
|
|
|
error_details = []
|
|
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
output_items = getattr(response, "output", None) if response is not None else None
|
|
|
|
|
|
if response is None:
|
|
|
|
|
|
response_invalid = True
|
|
|
|
|
|
error_details.append("response is None")
|
|
|
|
|
|
elif not isinstance(output_items, list):
|
|
|
|
|
|
response_invalid = True
|
|
|
|
|
|
error_details.append("response.output is not a list")
|
|
|
|
|
|
elif len(output_items) == 0:
|
|
|
|
|
|
response_invalid = True
|
|
|
|
|
|
error_details.append("response.output is empty")
|
|
|
|
|
|
else:
|
|
|
|
|
|
if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
|
|
|
|
|
|
response_invalid = True
|
|
|
|
|
|
if response is None:
|
|
|
|
|
|
error_details.append("response is None")
|
|
|
|
|
|
elif not hasattr(response, 'choices'):
|
|
|
|
|
|
error_details.append("response has no 'choices' attribute")
|
|
|
|
|
|
elif response.choices is None:
|
|
|
|
|
|
error_details.append("response.choices is None")
|
|
|
|
|
|
else:
|
|
|
|
|
|
error_details.append("response.choices is empty")
|
|
|
|
|
|
|
|
|
|
|
|
if response_invalid:
|
2026-01-31 06:30:48 +00:00
|
|
|
|
# Stop spinner before printing error messages
|
|
|
|
|
|
if thinking_spinner:
|
|
|
|
|
|
thinking_spinner.stop(f"(´;ω;`) oops, retrying...")
|
|
|
|
|
|
thinking_spinner = None
|
2026-03-09 23:26:43 -07:00
|
|
|
|
if self.thinking_callback:
|
|
|
|
|
|
self.thinking_callback("")
|
2026-01-31 06:30:48 +00:00
|
|
|
|
|
2026-01-10 05:56:26 +00:00
|
|
|
|
# This is often rate limiting or provider returning malformed response
|
|
|
|
|
|
retry_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
# Check for error field in response (some providers include this)
|
|
|
|
|
|
error_msg = "Unknown"
|
2026-01-29 06:10:24 +00:00
|
|
|
|
provider_name = "Unknown"
|
2026-01-10 05:56:26 +00:00
|
|
|
|
if response and hasattr(response, 'error') and response.error:
|
|
|
|
|
|
error_msg = str(response.error)
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Try to extract provider from error metadata
|
|
|
|
|
|
if hasattr(response.error, 'metadata') and response.error.metadata:
|
|
|
|
|
|
provider_name = response.error.metadata.get('provider_name', 'Unknown')
|
2026-01-10 05:56:26 +00:00
|
|
|
|
elif response and hasattr(response, 'message') and response.message:
|
|
|
|
|
|
error_msg = str(response.message)
|
|
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Try to get provider from model field (OpenRouter often returns actual model used)
|
|
|
|
|
|
if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
|
|
|
|
|
|
provider_name = f"model={response.model}"
|
|
|
|
|
|
|
|
|
|
|
|
# Check for x-openrouter-provider or similar metadata
|
|
|
|
|
|
if provider_name == "Unknown" and response:
|
|
|
|
|
|
# Log all response attributes for debugging
|
|
|
|
|
|
resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.debug(f"Response attributes for invalid response: {resp_attrs}")
|
|
|
|
|
|
|
2026-01-10 05:56:26 +00:00
|
|
|
|
print(f"{self.log_prefix}⚠️ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
|
2026-01-29 06:10:24 +00:00
|
|
|
|
print(f"{self.log_prefix} 🏢 Provider: {provider_name}")
|
2026-01-10 05:56:26 +00:00
|
|
|
|
print(f"{self.log_prefix} 📝 Provider message: {error_msg[:200]}")
|
|
|
|
|
|
print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
|
|
|
|
|
|
|
2026-03-01 02:27:26 +03:00
|
|
|
|
if retry_count >= max_retries:
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
# Try fallback before giving up
|
|
|
|
|
|
if self._try_activate_fallback():
|
|
|
|
|
|
retry_count = 0
|
|
|
|
|
|
continue
|
2026-01-10 05:56:26 +00:00
|
|
|
|
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-01-10 05:56:26 +00:00
|
|
|
|
return {
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"api_calls": api_call_count,
|
2026-02-25 18:20:38 -08:00
|
|
|
|
"error": "Invalid API response shape. Likely rate limited or malformed provider response.",
|
2026-01-10 05:56:26 +00:00
|
|
|
|
"failed": True # Mark as failure for filtering
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Longer backoff for rate limiting (likely cause of None choices)
|
|
|
|
|
|
wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
|
|
|
|
|
|
print(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
|
2026-01-29 06:10:24 +00:00
|
|
|
|
logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
|
2026-02-08 10:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
# Sleep in small increments to stay responsive to interrupts
|
|
|
|
|
|
sleep_end = time.time() + wait_time
|
|
|
|
|
|
while time.time() < sleep_end:
|
|
|
|
|
|
if self._interrupt_requested:
|
|
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-27 03:21:49 -08:00
|
|
|
|
self.clear_interrupt()
|
2026-02-08 10:49:24 +00:00
|
|
|
|
return {
|
2026-03-08 18:58:23 -07:00
|
|
|
|
"final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).",
|
2026-02-08 10:49:24 +00:00
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"interrupted": True,
|
|
|
|
|
|
}
|
|
|
|
|
|
time.sleep(0.2)
|
2026-01-10 05:56:26 +00:00
|
|
|
|
continue # Retry the API call
|
|
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
# Check finish_reason before proceeding
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
status = getattr(response, "status", None)
|
|
|
|
|
|
incomplete_details = getattr(response, "incomplete_details", None)
|
|
|
|
|
|
incomplete_reason = None
|
|
|
|
|
|
if isinstance(incomplete_details, dict):
|
|
|
|
|
|
incomplete_reason = incomplete_details.get("reason")
|
|
|
|
|
|
else:
|
|
|
|
|
|
incomplete_reason = getattr(incomplete_details, "reason", None)
|
|
|
|
|
|
if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
|
|
|
|
|
|
finish_reason = "length"
|
|
|
|
|
|
else:
|
|
|
|
|
|
finish_reason = "stop"
|
|
|
|
|
|
else:
|
|
|
|
|
|
finish_reason = response.choices[0].finish_reason
|
2026-03-07 18:45:17 -05:00
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
if finish_reason == "length":
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens")
|
2026-03-07 18:45:17 -05:00
|
|
|
|
|
|
|
|
|
|
if self.api_mode == "chat_completions":
|
|
|
|
|
|
assistant_message = response.choices[0].message
|
|
|
|
|
|
if not assistant_message.tool_calls:
|
|
|
|
|
|
length_continue_retries += 1
|
|
|
|
|
|
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
|
|
|
|
|
messages.append(interim_msg)
|
|
|
|
|
|
self._log_msg_to_db(interim_msg)
|
|
|
|
|
|
if assistant_message.content:
|
|
|
|
|
|
truncated_response_prefix += assistant_message.content
|
|
|
|
|
|
|
|
|
|
|
|
if length_continue_retries < 3:
|
|
|
|
|
|
print(
|
|
|
|
|
|
f"{self.log_prefix}↻ Requesting continuation "
|
|
|
|
|
|
f"({length_continue_retries}/3)..."
|
|
|
|
|
|
)
|
|
|
|
|
|
continue_msg = {
|
|
|
|
|
|
"role": "user",
|
|
|
|
|
|
"content": (
|
|
|
|
|
|
"[System: Your previous response was truncated by the output "
|
|
|
|
|
|
"length limit. Continue exactly where you left off. Do not "
|
|
|
|
|
|
"restart or repeat prior text. Finish the answer directly.]"
|
|
|
|
|
|
),
|
|
|
|
|
|
}
|
|
|
|
|
|
messages.append(continue_msg)
|
|
|
|
|
|
self._log_msg_to_db(continue_msg)
|
|
|
|
|
|
self._session_messages = messages
|
|
|
|
|
|
self._save_session_log(messages)
|
|
|
|
|
|
restart_with_length_continuation = True
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
|
|
|
|
|
|
self._cleanup_task_resources(effective_task_id)
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"final_response": partial_response or None,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": "Response remained truncated after 3 continuation attempts",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
# If we have prior messages, roll back to last complete state
|
|
|
|
|
|
if len(messages) > 1:
|
|
|
|
|
|
print(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")
|
|
|
|
|
|
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
2026-03-07 18:45:17 -05:00
|
|
|
|
|
2026-02-20 23:23:32 -08:00
|
|
|
|
self._cleanup_task_resources(effective_task_id)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-03-07 18:45:17 -05:00
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
|
|
|
|
|
"messages": rolled_back_messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": "Response truncated due to output length limit"
|
|
|
|
|
|
}
|
|
|
|
|
|
else:
|
|
|
|
|
|
# First message was truncated - mark as failed
|
|
|
|
|
|
print(f"{self.log_prefix}❌ First response truncated - cannot recover")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"failed": True,
|
|
|
|
|
|
"error": "First response truncated due to output length limit"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-01 18:01:31 -08:00
|
|
|
|
# Track actual token usage from response for context management
|
|
|
|
|
|
if hasattr(response, 'usage') and response.usage:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
|
|
|
|
|
|
completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
|
|
|
|
|
|
total_tokens = (
|
|
|
|
|
|
getattr(response.usage, 'total_tokens', None)
|
|
|
|
|
|
or (prompt_tokens + completion_tokens)
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
|
|
|
|
|
|
completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
|
|
|
|
|
|
total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
|
2026-02-01 18:01:31 -08:00
|
|
|
|
usage_dict = {
|
2026-02-25 18:20:38 -08:00
|
|
|
|
"prompt_tokens": prompt_tokens,
|
|
|
|
|
|
"completion_tokens": completion_tokens,
|
|
|
|
|
|
"total_tokens": total_tokens,
|
2026-02-01 18:01:31 -08:00
|
|
|
|
}
|
|
|
|
|
|
self.context_compressor.update_from_response(usage_dict)
|
2026-03-01 00:23:19 -08:00
|
|
|
|
|
2026-03-05 16:09:57 -08:00
|
|
|
|
# Cache discovered context length after successful call
|
|
|
|
|
|
if self.context_compressor._context_probed:
|
|
|
|
|
|
ctx = self.context_compressor.context_length
|
|
|
|
|
|
save_context_length(self.model, self.base_url, ctx)
|
|
|
|
|
|
print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
|
|
|
|
|
|
self.context_compressor._context_probed = False
|
|
|
|
|
|
|
2026-03-01 00:23:19 -08:00
|
|
|
|
self.session_prompt_tokens += prompt_tokens
|
|
|
|
|
|
self.session_completion_tokens += completion_tokens
|
|
|
|
|
|
self.session_total_tokens += total_tokens
|
|
|
|
|
|
self.session_api_calls += 1
|
2026-02-01 18:01:31 -08:00
|
|
|
|
|
|
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
|
2026-02-10 06:49:41 +00:00
|
|
|
|
|
|
|
|
|
|
# Log cache hit stats when prompt caching is active
|
|
|
|
|
|
if self._use_prompt_caching:
|
|
|
|
|
|
details = getattr(response.usage, 'prompt_tokens_details', None)
|
|
|
|
|
|
cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
|
|
|
|
|
|
written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
|
|
|
|
|
|
prompt = usage_dict["prompt_tokens"]
|
|
|
|
|
|
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
2026-02-01 18:01:31 -08:00
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
break # Success, exit retry loop
|
2025-11-17 18:35:36 -05:00
|
|
|
|
|
2026-02-21 12:15:40 -08:00
|
|
|
|
except InterruptedError:
|
|
|
|
|
|
if thinking_spinner:
|
|
|
|
|
|
thinking_spinner.stop("")
|
|
|
|
|
|
thinking_spinner = None
|
2026-03-09 23:26:43 -07:00
|
|
|
|
if self.thinking_callback:
|
|
|
|
|
|
self.thinking_callback("")
|
2026-03-08 18:58:23 -07:00
|
|
|
|
api_elapsed = time.time() - api_start_time
|
2026-02-21 12:15:40 -08:00
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupted during API call.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-21 12:15:40 -08:00
|
|
|
|
interrupted = True
|
2026-03-08 18:58:23 -07:00
|
|
|
|
final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)."
|
2026-02-21 12:15:40 -08:00
|
|
|
|
break
|
|
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
except Exception as api_error:
|
2026-01-31 06:30:48 +00:00
|
|
|
|
# Stop spinner before printing error messages
|
|
|
|
|
|
if thinking_spinner:
|
|
|
|
|
|
thinking_spinner.stop(f"(╥_╥) error, retrying...")
|
|
|
|
|
|
thinking_spinner = None
|
2026-03-09 23:26:43 -07:00
|
|
|
|
if self.thinking_callback:
|
|
|
|
|
|
self.thinking_callback("")
|
2026-02-25 19:27:54 -08:00
|
|
|
|
|
|
|
|
|
|
status_code = getattr(api_error, "status_code", None)
|
|
|
|
|
|
if (
|
|
|
|
|
|
self.api_mode == "codex_responses"
|
|
|
|
|
|
and self.provider == "openai-codex"
|
|
|
|
|
|
and status_code == 401
|
|
|
|
|
|
and not codex_auth_retry_attempted
|
|
|
|
|
|
):
|
|
|
|
|
|
codex_auth_retry_attempted = True
|
|
|
|
|
|
if self._try_refresh_codex_client_credentials(force=True):
|
|
|
|
|
|
print(f"{self.log_prefix}🔐 Codex auth refreshed after 401. Retrying request...")
|
|
|
|
|
|
continue
|
2026-03-07 13:34:23 +11:00
|
|
|
|
if (
|
|
|
|
|
|
self.api_mode == "chat_completions"
|
|
|
|
|
|
and self.provider == "nous"
|
|
|
|
|
|
and status_code == 401
|
|
|
|
|
|
and not nous_auth_retry_attempted
|
|
|
|
|
|
):
|
|
|
|
|
|
nous_auth_retry_attempted = True
|
|
|
|
|
|
if self._try_refresh_nous_client_credentials(force=True):
|
|
|
|
|
|
print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
|
|
|
|
|
|
continue
|
2026-03-01 16:15:20 -08:00
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
retry_count += 1
|
2026-01-08 08:57:51 +00:00
|
|
|
|
elapsed_time = time.time() - api_start_time
|
|
|
|
|
|
|
|
|
|
|
|
# Enhanced error logging
|
|
|
|
|
|
error_type = type(api_error).__name__
|
2026-01-10 05:56:26 +00:00
|
|
|
|
error_msg = str(api_error).lower()
|
2026-01-08 08:57:51 +00:00
|
|
|
|
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
|
|
|
|
|
|
print(f"{self.log_prefix} ⏱️ Time elapsed before failure: {elapsed_time:.2f}s")
|
2026-01-10 05:56:26 +00:00
|
|
|
|
print(f"{self.log_prefix} 📝 Error: {str(api_error)[:200]}")
|
2026-01-08 08:57:51 +00:00
|
|
|
|
print(f"{self.log_prefix} 📊 Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
|
|
|
|
|
|
|
2026-02-08 10:49:24 +00:00
|
|
|
|
# Check for interrupt before deciding to retry
|
|
|
|
|
|
if self._interrupt_requested:
|
|
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-27 03:21:49 -08:00
|
|
|
|
self.clear_interrupt()
|
2026-02-08 10:49:24 +00:00
|
|
|
|
return {
|
2026-03-08 18:58:23 -07:00
|
|
|
|
"final_response": f"Operation interrupted: handling API error ({error_type}: {str(api_error)[:80]}).",
|
2026-02-08 10:49:24 +00:00
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"interrupted": True,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-27 12:21:27 -05:00
|
|
|
|
# Check for 413 payload-too-large BEFORE generic 4xx handler.
|
|
|
|
|
|
# A 413 is a payload-size error — the correct response is to
|
|
|
|
|
|
# compress history and retry, not abort immediately.
|
|
|
|
|
|
status_code = getattr(api_error, "status_code", None)
|
|
|
|
|
|
is_payload_too_large = (
|
|
|
|
|
|
status_code == 413
|
|
|
|
|
|
or 'request entity too large' in error_msg
|
2026-02-27 23:04:32 -08:00
|
|
|
|
or 'payload too large' in error_msg
|
2026-02-27 12:21:27 -05:00
|
|
|
|
or 'error code: 413' in error_msg
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if is_payload_too_large:
|
2026-03-07 09:19:07 -08:00
|
|
|
|
compression_attempts += 1
|
|
|
|
|
|
if compression_attempts > max_compression_attempts:
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
|
|
|
|
|
|
"partial": True
|
|
|
|
|
|
}
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
|
2026-02-27 12:21:27 -05:00
|
|
|
|
|
|
|
|
|
|
original_len = len(messages)
|
|
|
|
|
|
messages, active_system_prompt = self._compress_context(
|
|
|
|
|
|
messages, system_message, approx_tokens=approx_tokens
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if len(messages) < original_len:
|
|
|
|
|
|
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
2026-03-07 09:19:07 -08:00
|
|
|
|
time.sleep(2) # Brief pause between compression retries
|
2026-03-07 15:13:45 -05:00
|
|
|
|
restart_with_compressed_messages = True
|
|
|
|
|
|
break
|
2026-02-27 12:21:27 -05:00
|
|
|
|
else:
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"error": "Request payload too large (413). Cannot compress further.",
|
|
|
|
|
|
"partial": True
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-04 14:42:41 -08:00
|
|
|
|
# Check for context-length errors BEFORE generic 4xx handler.
|
2026-03-05 01:12:34 +03:00
|
|
|
|
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
|
|
|
|
|
# HTTP 400 with messages like "Context size has been exceeded"
|
|
|
|
|
|
# which must trigger compression, not an immediate abort.
|
2026-01-10 05:56:26 +00:00
|
|
|
|
is_context_length_error = any(phrase in error_msg for phrase in [
|
2026-03-05 01:12:34 +03:00
|
|
|
|
'context length', 'context size', 'maximum context',
|
|
|
|
|
|
'token limit', 'too many tokens', 'reduce the length',
|
|
|
|
|
|
'exceeds the limit', 'context window',
|
2026-02-27 12:21:27 -05:00
|
|
|
|
'request entity too large', # OpenRouter/Nous 413 safety net
|
2026-01-10 05:56:26 +00:00
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
|
|
if is_context_length_error:
|
2026-03-05 16:09:57 -08:00
|
|
|
|
compressor = self.context_compressor
|
|
|
|
|
|
old_ctx = compressor.context_length
|
|
|
|
|
|
|
|
|
|
|
|
# Try to parse the actual limit from the error message
|
|
|
|
|
|
parsed_limit = parse_context_limit_from_error(error_msg)
|
|
|
|
|
|
if parsed_limit and parsed_limit < old_ctx:
|
|
|
|
|
|
new_ctx = parsed_limit
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Step down to the next probe tier
|
|
|
|
|
|
new_ctx = get_next_probe_tier(old_ctx)
|
|
|
|
|
|
|
|
|
|
|
|
if new_ctx and new_ctx < old_ctx:
|
|
|
|
|
|
compressor.context_length = new_ctx
|
|
|
|
|
|
compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
|
|
|
|
|
|
compressor._context_probed = True
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Context length exceeded at minimum tier — attempting compression...")
|
2026-03-05 01:12:34 +03:00
|
|
|
|
|
2026-03-07 09:19:07 -08:00
|
|
|
|
compression_attempts += 1
|
|
|
|
|
|
if compression_attempts > max_compression_attempts:
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
|
|
|
|
|
"partial": True
|
|
|
|
|
|
}
|
|
|
|
|
|
print(f"{self.log_prefix} 🗜️ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
|
|
|
|
|
|
|
2026-02-01 18:01:31 -08:00
|
|
|
|
original_len = len(messages)
|
2026-02-21 04:17:27 -08:00
|
|
|
|
messages, active_system_prompt = self._compress_context(
|
|
|
|
|
|
messages, system_message, approx_tokens=approx_tokens
|
|
|
|
|
|
)
|
2026-03-05 01:12:34 +03:00
|
|
|
|
|
2026-03-05 16:09:57 -08:00
|
|
|
|
if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
|
|
|
|
|
|
if len(messages) < original_len:
|
|
|
|
|
|
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
2026-03-07 09:19:07 -08:00
|
|
|
|
time.sleep(2) # Brief pause between compression retries
|
2026-03-07 15:13:45 -05:00
|
|
|
|
restart_with_compressed_messages = True
|
|
|
|
|
|
break
|
2026-02-01 18:01:31 -08:00
|
|
|
|
else:
|
2026-03-05 16:09:57 -08:00
|
|
|
|
# Can't compress further and already at minimum tier
|
2026-02-01 18:01:31 -08:00
|
|
|
|
print(f"{self.log_prefix}❌ Context length exceeded and cannot compress further.")
|
|
|
|
|
|
print(f"{self.log_prefix} 💡 The conversation has accumulated too much content.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-01 18:01:31 -08:00
|
|
|
|
return {
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
|
|
|
|
|
|
"partial": True
|
|
|
|
|
|
}
|
2026-03-04 14:42:41 -08:00
|
|
|
|
|
|
|
|
|
|
# Check for non-retryable client errors (4xx HTTP status codes).
|
|
|
|
|
|
# These indicate a problem with the request itself (bad model ID,
|
|
|
|
|
|
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
2026-03-05 01:12:34 +03:00
|
|
|
|
# Note: 413 and context-length errors are excluded — handled above.
|
2026-03-04 14:42:41 -08:00
|
|
|
|
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
|
|
|
|
|
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
|
2026-03-05 01:12:34 +03:00
|
|
|
|
'error code: 401', 'error code: 403',
|
2026-03-04 14:42:41 -08:00
|
|
|
|
'error code: 404', 'error code: 422',
|
|
|
|
|
|
'is not a valid model', 'invalid model', 'model not found',
|
|
|
|
|
|
'invalid api key', 'invalid_api_key', 'authentication',
|
|
|
|
|
|
'unauthorized', 'forbidden', 'not found',
|
|
|
|
|
|
])) and not is_context_length_error
|
|
|
|
|
|
|
|
|
|
|
|
if is_client_error:
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
# Try fallback before aborting — a different provider
|
|
|
|
|
|
# may not have the same issue (rate limit, auth, etc.)
|
|
|
|
|
|
if self._try_activate_fallback():
|
|
|
|
|
|
retry_count = 0
|
|
|
|
|
|
continue
|
2026-03-04 14:42:41 -08:00
|
|
|
|
self._dump_api_request_debug(
|
|
|
|
|
|
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
|
|
|
|
|
)
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
|
|
|
|
|
|
print(f"{self.log_prefix} 💡 This type of error won't be fixed by retrying.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"failed": True,
|
|
|
|
|
|
"error": str(api_error),
|
|
|
|
|
|
}
|
2026-03-05 01:12:34 +03:00
|
|
|
|
|
2026-03-01 02:27:26 +03:00
|
|
|
|
if retry_count >= max_retries:
|
feat: simple fallback model for provider resilience
When the primary model/provider fails after retries (rate limit, overload,
auth errors, connection failures), Hermes automatically switches to a
configured fallback model for the remainder of the session.
Config (in ~/.hermes/config.yaml):
fallback_model:
provider: openrouter
model: anthropic/claude-sonnet-4
Supports all major providers: OpenRouter, OpenAI, Nous, DeepSeek, Together,
Groq, Fireworks, Mistral, Gemini — plus custom endpoints via base_url and
api_key_env overrides.
Design principles:
- Dead simple: one fallback model, not a chain
- One-shot: switches once, doesn't ping-pong back
- Zero new dependencies: uses existing OpenAI client
- Minimal code: ~100 lines in run_agent.py, ~5 lines in cli.py/gateway
- Three trigger points: max retries exhausted, non-retryable client errors,
and invalid response exhaustion
Does NOT trigger on context overflow or payload-too-large errors (those
are handled by the existing compression system).
Addresses #737.
25 new tests, 2492 total passing.
2026-03-08 20:22:33 -07:00
|
|
|
|
# Try fallback before giving up entirely
|
|
|
|
|
|
if self._try_activate_fallback():
|
|
|
|
|
|
retry_count = 0
|
|
|
|
|
|
continue
|
2026-01-08 08:57:51 +00:00
|
|
|
|
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
|
|
|
|
|
logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
|
2025-09-10 00:51:41 -07:00
|
|
|
|
raise api_error
|
2025-11-17 18:35:36 -05:00
|
|
|
|
|
|
|
|
|
|
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
|
2025-09-10 00:51:41 -07:00
|
|
|
|
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
|
2026-02-27 17:35:26 -08:00
|
|
|
|
if retry_count >= max_retries:
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
|
|
|
|
|
|
print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
|
2026-02-08 10:49:24 +00:00
|
|
|
|
|
|
|
|
|
|
# Sleep in small increments so we can respond to interrupts quickly
|
|
|
|
|
|
# instead of blocking the entire wait_time in one sleep() call
|
|
|
|
|
|
sleep_end = time.time() + wait_time
|
|
|
|
|
|
while time.time() < sleep_end:
|
|
|
|
|
|
if self._interrupt_requested:
|
|
|
|
|
|
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-27 03:21:49 -08:00
|
|
|
|
self.clear_interrupt()
|
2026-02-08 10:49:24 +00:00
|
|
|
|
return {
|
2026-03-08 18:58:23 -07:00
|
|
|
|
"final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).",
|
2026-02-08 10:49:24 +00:00
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"interrupted": True,
|
|
|
|
|
|
}
|
|
|
|
|
|
time.sleep(0.2) # Check interrupt every 200ms
|
2025-09-10 00:51:41 -07:00
|
|
|
|
|
2026-02-21 12:15:40 -08:00
|
|
|
|
# If the API call was interrupted, skip response processing
|
|
|
|
|
|
if interrupted:
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2026-03-07 15:13:45 -05:00
|
|
|
|
if restart_with_compressed_messages:
|
|
|
|
|
|
api_call_count -= 1
|
|
|
|
|
|
self.iteration_budget.refund()
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2026-03-07 18:45:17 -05:00
|
|
|
|
if restart_with_length_continuation:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
2026-03-07 09:19:07 -08:00
|
|
|
|
# Guard: if all retries exhausted without a successful response
|
|
|
|
|
|
# (e.g. repeated context-length errors that exhausted retry_count),
|
|
|
|
|
|
# the `response` variable is still None. Break out cleanly.
|
|
|
|
|
|
if response is None:
|
|
|
|
|
|
print(f"{self.log_prefix}❌ All API retries exhausted with no successful response.")
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
try:
|
2026-02-25 18:20:38 -08:00
|
|
|
|
if self.api_mode == "codex_responses":
|
|
|
|
|
|
assistant_message, finish_reason = self._normalize_codex_response(response)
|
|
|
|
|
|
else:
|
|
|
|
|
|
assistant_message = response.choices[0].message
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-03-09 03:32:32 -07:00
|
|
|
|
# Normalize content to string — some OpenAI-compatible servers
|
|
|
|
|
|
# (llama-server, etc.) return content as a dict or list instead
|
|
|
|
|
|
# of a plain string, which crashes downstream .strip() calls.
|
|
|
|
|
|
if assistant_message.content is not None and not isinstance(assistant_message.content, str):
|
|
|
|
|
|
raw = assistant_message.content
|
|
|
|
|
|
if isinstance(raw, dict):
|
|
|
|
|
|
assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
|
|
|
|
|
|
elif isinstance(raw, list):
|
|
|
|
|
|
# Multimodal content list — extract text parts
|
|
|
|
|
|
parts = []
|
|
|
|
|
|
for part in raw:
|
|
|
|
|
|
if isinstance(part, str):
|
|
|
|
|
|
parts.append(part)
|
|
|
|
|
|
elif isinstance(part, dict) and part.get("type") == "text":
|
|
|
|
|
|
parts.append(part.get("text", ""))
|
|
|
|
|
|
elif isinstance(part, dict) and "text" in part:
|
|
|
|
|
|
parts.append(str(part["text"]))
|
|
|
|
|
|
assistant_message.content = "\n".join(parts)
|
|
|
|
|
|
else:
|
|
|
|
|
|
assistant_message.content = str(raw)
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Handle assistant response
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if assistant_message.content and not self.quiet_mode:
|
2026-01-08 08:57:51 +00:00
|
|
|
|
print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
|
feat(gateway): expose subagent tool calls and thinking to user (fixes #169) (#186)
When subagents run via delegate_task, the user now sees real-time
progress instead of silence:
CLI: tree-view activity lines print above the delegation spinner
🔀 Delegating: research quantum computing
├─ 💭 "I'll search for papers first..."
├─ 🔍 web_search "quantum computing"
├─ 📖 read_file "paper.pdf"
└─ ⠹ working... (18.2s)
Gateway (Telegram/Discord): batched progress summaries sent every
5 tool calls to avoid message spam. Remaining tools flushed on
subagent completion.
Changes:
- agent/display.py: add KawaiiSpinner.print_above() to print
status lines above an active spinner without disrupting animation.
Uses captured stdout (self._out) so it works inside the child's
redirect_stdout(devnull).
- tools/delegate_tool.py: add _build_child_progress_callback()
that creates a per-child callback relaying tool calls and
thinking events to the parent's spinner (CLI) or progress
queue (gateway). Each child gets its own callback instance,
so parallel subagents don't share state. Includes _flush()
for gateway batch completion.
- run_agent.py: fire tool_progress_callback with '_thinking'
event when the model produces text content. Guarded by
_delegate_depth > 0 so only subagents fire this (prevents
gateway spam from main agent). REASONING_SCRATCHPAD/think/
reasoning XML tags are stripped before display.
Tests: 21 new tests covering print_above, callback builder,
thinking relay, SCRATCHPAD filtering, batching, flush, thread
isolation, delegate_depth guard, and prefix handling.
2026-03-01 10:18:00 +03:00
|
|
|
|
|
|
|
|
|
|
# Notify progress callback of model's thinking (used by subagent
|
|
|
|
|
|
# delegation to relay the child's reasoning to the parent display).
|
|
|
|
|
|
# Guard: only fire for subagents (_delegate_depth >= 1) to avoid
|
|
|
|
|
|
# spamming gateway platforms with the main agent's every thought.
|
|
|
|
|
|
if (assistant_message.content and self.tool_progress_callback
|
|
|
|
|
|
and getattr(self, '_delegate_depth', 0) > 0):
|
|
|
|
|
|
_think_text = assistant_message.content.strip()
|
|
|
|
|
|
# Strip reasoning XML tags that shouldn't leak to parent display
|
|
|
|
|
|
_think_text = re.sub(
|
|
|
|
|
|
r'</?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
|
|
|
|
|
|
).strip()
|
|
|
|
|
|
first_line = _think_text.split('\n')[0][:80] if _think_text else ""
|
|
|
|
|
|
if first_line:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.tool_progress_callback("_thinking", first_line)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-08 20:19:14 +00:00
|
|
|
|
# Check for incomplete <REASONING_SCRATCHPAD> (opened but never closed)
|
|
|
|
|
|
# This means the model ran out of output tokens mid-reasoning — retry up to 2 times
|
2026-02-23 09:55:09 -08:00
|
|
|
|
if has_incomplete_scratchpad(assistant_message.content or ""):
|
2026-02-08 20:19:14 +00:00
|
|
|
|
if not hasattr(self, '_incomplete_scratchpad_retries'):
|
|
|
|
|
|
self._incomplete_scratchpad_retries = 0
|
|
|
|
|
|
self._incomplete_scratchpad_retries += 1
|
|
|
|
|
|
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
|
|
|
|
|
|
|
|
|
|
|
|
if self._incomplete_scratchpad_retries <= 2:
|
|
|
|
|
|
print(f"{self.log_prefix}🔄 Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
|
|
|
|
|
|
# Don't add the broken message, just retry
|
|
|
|
|
|
continue
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Max retries - discard this turn and save as partial
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.")
|
|
|
|
|
|
self._incomplete_scratchpad_retries = 0
|
|
|
|
|
|
|
|
|
|
|
|
rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
|
2026-02-20 23:23:32 -08:00
|
|
|
|
self._cleanup_task_resources(effective_task_id)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-08 20:19:14 +00:00
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
|
|
|
|
|
"messages": rolled_back_messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Reset incomplete scratchpad counter on clean response
|
|
|
|
|
|
if hasattr(self, '_incomplete_scratchpad_retries'):
|
|
|
|
|
|
self._incomplete_scratchpad_retries = 0
|
2026-02-25 18:20:38 -08:00
|
|
|
|
|
|
|
|
|
|
if self.api_mode == "codex_responses" and finish_reason == "incomplete":
|
|
|
|
|
|
if not hasattr(self, "_codex_incomplete_retries"):
|
|
|
|
|
|
self._codex_incomplete_retries = 0
|
|
|
|
|
|
self._codex_incomplete_retries += 1
|
|
|
|
|
|
|
|
|
|
|
|
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
|
2026-03-02 02:23:53 -08:00
|
|
|
|
interim_has_content = bool((interim_msg.get("content") or "").strip())
|
2026-02-25 18:20:38 -08:00
|
|
|
|
interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
|
|
|
|
|
|
|
|
|
|
|
|
if interim_has_content or interim_has_reasoning:
|
|
|
|
|
|
last_msg = messages[-1] if messages else None
|
|
|
|
|
|
duplicate_interim = (
|
|
|
|
|
|
isinstance(last_msg, dict)
|
|
|
|
|
|
and last_msg.get("role") == "assistant"
|
|
|
|
|
|
and last_msg.get("finish_reason") == "incomplete"
|
|
|
|
|
|
and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
|
|
|
|
|
|
and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
|
|
|
|
|
|
)
|
|
|
|
|
|
if not duplicate_interim:
|
|
|
|
|
|
messages.append(interim_msg)
|
|
|
|
|
|
self._log_msg_to_db(interim_msg)
|
|
|
|
|
|
|
|
|
|
|
|
if self._codex_incomplete_retries < 3:
|
|
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"{self.log_prefix}↻ Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
|
|
|
|
|
|
self._session_messages = messages
|
|
|
|
|
|
self._save_session_log(messages)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
self._codex_incomplete_retries = 0
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
|
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": "Codex response remained incomplete after 3 continuation attempts",
|
|
|
|
|
|
}
|
|
|
|
|
|
elif hasattr(self, "_codex_incomplete_retries"):
|
|
|
|
|
|
self._codex_incomplete_retries = 0
|
2026-02-08 20:19:14 +00:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Check for tool calls
|
|
|
|
|
|
if assistant_message.tool_calls:
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"{self.log_prefix}🔧 Processing {len(assistant_message.tool_calls)} tool call(s)...")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
for tc in assistant_message.tool_calls:
|
|
|
|
|
|
logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
|
|
|
|
|
|
|
2026-01-10 05:56:26 +00:00
|
|
|
|
# Validate tool call names - detect model hallucinations
|
|
|
|
|
|
invalid_tool_calls = [
|
|
|
|
|
|
tc.function.name for tc in assistant_message.tool_calls
|
|
|
|
|
|
if tc.function.name not in self.valid_tool_names
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
if invalid_tool_calls:
|
|
|
|
|
|
# Track retries for invalid tool calls
|
|
|
|
|
|
if not hasattr(self, '_invalid_tool_retries'):
|
|
|
|
|
|
self._invalid_tool_retries = 0
|
|
|
|
|
|
self._invalid_tool_retries += 1
|
|
|
|
|
|
|
|
|
|
|
|
invalid_preview = invalid_tool_calls[0][:80] + "..." if len(invalid_tool_calls[0]) > 80 else invalid_tool_calls[0]
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Invalid tool call detected: '{invalid_preview}'")
|
|
|
|
|
|
print(f"{self.log_prefix} Valid tools: {sorted(self.valid_tool_names)}")
|
|
|
|
|
|
|
|
|
|
|
|
if self._invalid_tool_retries < 3:
|
|
|
|
|
|
print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_tool_retries}/3)...")
|
|
|
|
|
|
# Don't add anything to messages, just retry the API call
|
|
|
|
|
|
continue
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(f"{self.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.")
|
|
|
|
|
|
# Return partial result - don't include the bad tool call in messages
|
2026-02-21 00:05:39 -08:00
|
|
|
|
self._invalid_tool_retries = 0
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-01-10 05:56:26 +00:00
|
|
|
|
return {
|
|
|
|
|
|
"final_response": None,
|
2026-02-21 00:05:39 -08:00
|
|
|
|
"messages": messages,
|
2026-01-10 05:56:26 +00:00
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": f"Model generated invalid tool call: {invalid_preview}"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Reset retry counter on successful tool call validation
|
|
|
|
|
|
if hasattr(self, '_invalid_tool_retries'):
|
|
|
|
|
|
self._invalid_tool_retries = 0
|
|
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
# Validate tool call arguments are valid JSON
|
2026-02-04 13:57:59 -08:00
|
|
|
|
# Handle empty strings as empty objects (common model quirk)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
invalid_json_args = []
|
|
|
|
|
|
for tc in assistant_message.tool_calls:
|
2026-02-04 13:57:59 -08:00
|
|
|
|
args = tc.function.arguments
|
|
|
|
|
|
# Treat empty/whitespace strings as empty object
|
|
|
|
|
|
if not args or not args.strip():
|
|
|
|
|
|
tc.function.arguments = "{}"
|
|
|
|
|
|
continue
|
2026-01-10 13:04:43 +00:00
|
|
|
|
try:
|
2026-02-04 13:57:59 -08:00
|
|
|
|
json.loads(args)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
|
|
invalid_json_args.append((tc.function.name, str(e)))
|
|
|
|
|
|
|
|
|
|
|
|
if invalid_json_args:
|
|
|
|
|
|
# Track retries for invalid JSON arguments
|
|
|
|
|
|
self._invalid_json_retries += 1
|
|
|
|
|
|
|
|
|
|
|
|
tool_name, error_msg = invalid_json_args[0]
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
|
|
|
|
|
|
|
|
|
|
|
|
if self._invalid_json_retries < 3:
|
|
|
|
|
|
print(f"{self.log_prefix}🔄 Retrying API call ({self._invalid_json_retries}/3)...")
|
|
|
|
|
|
# Don't add anything to messages, just retry the API call
|
|
|
|
|
|
continue
|
|
|
|
|
|
else:
|
2026-02-03 15:26:59 -08:00
|
|
|
|
# Instead of returning partial, inject a helpful message and let model recover
|
|
|
|
|
|
print(f"{self.log_prefix}⚠️ Injecting recovery message for invalid JSON...")
|
|
|
|
|
|
self._invalid_json_retries = 0 # Reset for next attempt
|
|
|
|
|
|
|
|
|
|
|
|
# Add a user message explaining the issue
|
|
|
|
|
|
recovery_msg = (
|
|
|
|
|
|
f"Your tool call to '{tool_name}' had invalid JSON arguments. "
|
|
|
|
|
|
f"Error: {error_msg}. "
|
|
|
|
|
|
f"For tools with no required parameters, use an empty object: {{}}. "
|
|
|
|
|
|
f"Please either retry the tool call with valid JSON, or respond without using that tool."
|
|
|
|
|
|
)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
recovery_dict = {"role": "user", "content": recovery_msg}
|
|
|
|
|
|
messages.append(recovery_dict)
|
|
|
|
|
|
self._log_msg_to_db(recovery_dict)
|
2026-02-03 15:26:59 -08:00
|
|
|
|
continue
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
|
|
|
|
|
# Reset retry counter on successful JSON validation
|
|
|
|
|
|
self._invalid_json_retries = 0
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
2026-02-01 22:48:18 -08:00
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
# If this turn has both content AND tool_calls, capture the content
|
|
|
|
|
|
# as a fallback final response. Common pattern: model delivers its
|
|
|
|
|
|
# answer and calls memory/skill tools as a side-effect in the same
|
|
|
|
|
|
# turn. If the follow-up turn after tools is empty, we use this.
|
|
|
|
|
|
turn_content = assistant_message.content or ""
|
|
|
|
|
|
if turn_content and self._has_content_after_think_block(turn_content):
|
|
|
|
|
|
self._last_content_with_tools = turn_content
|
|
|
|
|
|
# Show intermediate commentary so the user can follow along
|
|
|
|
|
|
if self.quiet_mode:
|
|
|
|
|
|
clean = self._strip_think_blocks(turn_content).strip()
|
|
|
|
|
|
if clean:
|
2026-03-02 00:32:06 -08:00
|
|
|
|
print(f" ┊ 💬 {clean}")
|
2026-02-22 21:33:28 -08:00
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
messages.append(assistant_msg)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._log_msg_to_db(assistant_msg)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
self._execute_tool_calls(assistant_message, messages, effective_task_id)
|
2026-03-07 08:16:37 -08:00
|
|
|
|
|
|
|
|
|
|
# Refund the iteration if the ONLY tool(s) called were
|
|
|
|
|
|
# execute_code (programmatic tool calling). These are
|
|
|
|
|
|
# cheap RPC-style calls that shouldn't eat the budget.
|
|
|
|
|
|
_tc_names = {tc.function.name for tc in assistant_message.tool_calls}
|
|
|
|
|
|
if _tc_names == {"execute_code"}:
|
|
|
|
|
|
self.iteration_budget.refund()
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-02-01 18:01:31 -08:00
|
|
|
|
if self.compression_enabled and self.context_compressor.should_compress():
|
2026-02-21 04:17:27 -08:00
|
|
|
|
messages, active_system_prompt = self._compress_context(
|
|
|
|
|
|
messages, system_message,
|
|
|
|
|
|
approx_tokens=self.context_compressor.last_prompt_tokens
|
2026-02-01 18:01:31 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-01 22:48:18 -08:00
|
|
|
|
# Save session log incrementally (so progress is visible even if interrupted)
|
|
|
|
|
|
self._session_messages = messages
|
|
|
|
|
|
self._save_session_log(messages)
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Continue loop for next response
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
# No tool calls - this is the final response
|
|
|
|
|
|
final_response = assistant_message.content or ""
|
|
|
|
|
|
|
2026-01-10 13:04:43 +00:00
|
|
|
|
# Check if response only has think block with no actual content after it
|
|
|
|
|
|
if not self._has_content_after_think_block(final_response):
|
2026-03-06 16:54:31 -08:00
|
|
|
|
# If the previous turn already delivered real content alongside
|
|
|
|
|
|
# tool calls (e.g. "You're welcome!" + memory save), the model
|
|
|
|
|
|
# has nothing more to say. Use the earlier content immediately
|
|
|
|
|
|
# instead of wasting API calls on retries that won't help.
|
|
|
|
|
|
fallback = getattr(self, '_last_content_with_tools', None)
|
|
|
|
|
|
if fallback:
|
|
|
|
|
|
logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
|
|
|
|
|
|
self._last_content_with_tools = None
|
|
|
|
|
|
self._empty_content_retries = 0
|
|
|
|
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
|
|
|
|
msg = messages[i]
|
|
|
|
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
|
|
|
|
tool_names = []
|
|
|
|
|
|
for tc in msg["tool_calls"]:
|
|
|
|
|
|
fn = tc.get("function", {})
|
|
|
|
|
|
tool_names.append(fn.get("name", "unknown"))
|
|
|
|
|
|
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
|
|
|
|
|
|
break
|
|
|
|
|
|
final_response = self._strip_think_blocks(fallback).strip()
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# No fallback available — this is a genuine empty response.
|
|
|
|
|
|
# Retry in case the model just had a bad generation.
|
2026-01-10 13:04:43 +00:00
|
|
|
|
if not hasattr(self, '_empty_content_retries'):
|
|
|
|
|
|
self._empty_content_retries = 0
|
|
|
|
|
|
self._empty_content_retries += 1
|
|
|
|
|
|
|
2026-02-21 00:05:39 -08:00
|
|
|
|
reasoning_text = self._extract_reasoning(assistant_message)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
print(f"{self.log_prefix}⚠️ Response only contains think block with no content after it")
|
2026-02-21 00:05:39 -08:00
|
|
|
|
if reasoning_text:
|
|
|
|
|
|
reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
|
|
|
|
|
|
print(f"{self.log_prefix} Reasoning: {reasoning_preview}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
|
|
|
|
|
|
print(f"{self.log_prefix} Content: '{content_preview}'")
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
|
|
|
|
|
if self._empty_content_retries < 3:
|
|
|
|
|
|
print(f"{self.log_prefix}🔄 Retrying API call ({self._empty_content_retries}/3)...")
|
|
|
|
|
|
continue
|
|
|
|
|
|
else:
|
2026-02-21 00:05:39 -08:00
|
|
|
|
print(f"{self.log_prefix}❌ Max retries (3) for empty content exceeded.")
|
|
|
|
|
|
self._empty_content_retries = 0
|
|
|
|
|
|
|
2026-02-22 21:33:28 -08:00
|
|
|
|
# If a prior tool_calls turn had real content, salvage it:
|
|
|
|
|
|
# rewrite that turn's content to a brief tool description,
|
|
|
|
|
|
# and use the original content as the final response here.
|
|
|
|
|
|
fallback = getattr(self, '_last_content_with_tools', None)
|
|
|
|
|
|
if fallback:
|
|
|
|
|
|
self._last_content_with_tools = None
|
|
|
|
|
|
# Find the last assistant message with tool_calls and rewrite it
|
|
|
|
|
|
for i in range(len(messages) - 1, -1, -1):
|
|
|
|
|
|
msg = messages[i]
|
|
|
|
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
|
|
|
|
tool_names = []
|
|
|
|
|
|
for tc in msg["tool_calls"]:
|
|
|
|
|
|
fn = tc.get("function", {})
|
|
|
|
|
|
tool_names.append(fn.get("name", "unknown"))
|
|
|
|
|
|
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
|
|
|
|
|
|
break
|
2026-02-28 03:06:20 -05:00
|
|
|
|
# Strip <think> blocks from fallback content for user display
|
|
|
|
|
|
final_response = self._strip_think_blocks(fallback).strip()
|
2026-02-22 21:33:28 -08:00
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# No fallback -- append the empty message as-is
|
2026-02-21 00:05:39 -08:00
|
|
|
|
empty_msg = {
|
|
|
|
|
|
"role": "assistant",
|
|
|
|
|
|
"content": final_response,
|
|
|
|
|
|
"reasoning": reasoning_text,
|
|
|
|
|
|
"finish_reason": finish_reason,
|
|
|
|
|
|
}
|
|
|
|
|
|
messages.append(empty_msg)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._log_msg_to_db(empty_msg)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
2026-02-20 23:23:32 -08:00
|
|
|
|
self._cleanup_task_resources(effective_task_id)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
|
|
|
|
|
return {
|
2026-02-21 00:05:39 -08:00
|
|
|
|
"final_response": final_response or None,
|
|
|
|
|
|
"messages": messages,
|
2026-01-10 13:04:43 +00:00
|
|
|
|
"api_calls": api_call_count,
|
|
|
|
|
|
"completed": False,
|
|
|
|
|
|
"partial": True,
|
|
|
|
|
|
"error": "Model generated only think blocks with no actual response after 3 retries"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Reset retry counter on successful content
|
|
|
|
|
|
if hasattr(self, '_empty_content_retries'):
|
|
|
|
|
|
self._empty_content_retries = 0
|
2026-02-25 18:11:26 -08:00
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
|
self.api_mode == "codex_responses"
|
|
|
|
|
|
and self.valid_tool_names
|
|
|
|
|
|
and codex_ack_continuations < 2
|
|
|
|
|
|
and self._looks_like_codex_intermediate_ack(
|
|
|
|
|
|
user_message=user_message,
|
|
|
|
|
|
assistant_content=final_response,
|
|
|
|
|
|
messages=messages,
|
|
|
|
|
|
)
|
|
|
|
|
|
):
|
|
|
|
|
|
codex_ack_continuations += 1
|
|
|
|
|
|
interim_msg = self._build_assistant_message(assistant_message, "incomplete")
|
|
|
|
|
|
messages.append(interim_msg)
|
|
|
|
|
|
self._log_msg_to_db(interim_msg)
|
|
|
|
|
|
|
|
|
|
|
|
continue_msg = {
|
|
|
|
|
|
"role": "user",
|
|
|
|
|
|
"content": (
|
|
|
|
|
|
"[System: Continue now. Execute the required tool calls and only "
|
|
|
|
|
|
"send your final answer after completing the task.]"
|
|
|
|
|
|
),
|
|
|
|
|
|
}
|
|
|
|
|
|
messages.append(continue_msg)
|
|
|
|
|
|
self._log_msg_to_db(continue_msg)
|
|
|
|
|
|
self._session_messages = messages
|
|
|
|
|
|
self._save_session_log(messages)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
codex_ack_continuations = 0
|
2026-03-07 18:45:17 -05:00
|
|
|
|
|
|
|
|
|
|
if truncated_response_prefix:
|
|
|
|
|
|
final_response = truncated_response_prefix + final_response
|
2026-01-10 13:04:43 +00:00
|
|
|
|
|
2026-02-28 03:06:20 -05:00
|
|
|
|
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
|
|
|
|
|
|
final_response = self._strip_think_blocks(final_response).strip()
|
|
|
|
|
|
|
2026-02-21 04:17:27 -08:00
|
|
|
|
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
2026-02-01 22:48:18 -08:00
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
messages.append(final_msg)
|
2026-02-22 17:10:24 -08:00
|
|
|
|
self._log_msg_to_db(final_msg)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
2026-01-31 06:30:48 +00:00
|
|
|
|
if not self.quiet_mode:
|
|
|
|
|
|
print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
|
2025-09-10 00:43:55 -07:00
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-11-17 18:35:36 -05:00
|
|
|
|
error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
|
2025-09-10 00:43:55 -07:00
|
|
|
|
print(f"❌ {error_msg}")
|
|
|
|
|
|
|
2025-09-10 00:51:41 -07:00
|
|
|
|
if self.verbose_logging:
|
|
|
|
|
|
logging.exception("Detailed error information:")
|
|
|
|
|
|
|
2026-02-22 02:16:11 -08:00
|
|
|
|
# If an assistant message with tool_calls was already appended,
|
|
|
|
|
|
# the API expects a role="tool" result for every tool_call_id.
|
|
|
|
|
|
# Fill in error results for any that weren't answered yet.
|
|
|
|
|
|
pending_handled = False
|
|
|
|
|
|
for idx in range(len(messages) - 1, -1, -1):
|
|
|
|
|
|
msg = messages[idx]
|
|
|
|
|
|
if not isinstance(msg, dict):
|
|
|
|
|
|
break
|
|
|
|
|
|
if msg.get("role") == "tool":
|
|
|
|
|
|
continue
|
|
|
|
|
|
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
|
|
|
|
answered_ids = {
|
|
|
|
|
|
m["tool_call_id"]
|
|
|
|
|
|
for m in messages[idx + 1:]
|
|
|
|
|
|
if isinstance(m, dict) and m.get("role") == "tool"
|
|
|
|
|
|
}
|
|
|
|
|
|
for tc in msg["tool_calls"]:
|
|
|
|
|
|
if tc["id"] not in answered_ids:
|
2026-02-22 17:10:24 -08:00
|
|
|
|
err_msg = {
|
2026-02-22 02:16:11 -08:00
|
|
|
|
"role": "tool",
|
|
|
|
|
|
"tool_call_id": tc["id"],
|
|
|
|
|
|
"content": f"Error executing tool: {error_msg}",
|
2026-02-22 17:10:24 -08:00
|
|
|
|
}
|
|
|
|
|
|
messages.append(err_msg)
|
|
|
|
|
|
self._log_msg_to_db(err_msg)
|
2026-02-22 02:16:11 -08:00
|
|
|
|
pending_handled = True
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if not pending_handled:
|
|
|
|
|
|
# Error happened before tool processing (e.g. response parsing).
|
|
|
|
|
|
# Use a user-role message so the model can see what went wrong
|
|
|
|
|
|
# without confusing the API with a fabricated assistant turn.
|
2026-02-22 17:10:24 -08:00
|
|
|
|
sys_err_msg = {
|
2026-02-22 02:16:11 -08:00
|
|
|
|
"role": "user",
|
|
|
|
|
|
"content": f"[System error during processing: {error_msg}]",
|
2026-02-22 17:10:24 -08:00
|
|
|
|
}
|
|
|
|
|
|
messages.append(sys_err_msg)
|
|
|
|
|
|
self._log_msg_to_db(sys_err_msg)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
# If we're near the limit, break to avoid infinite loops
|
|
|
|
|
|
if api_call_count >= self.max_iterations - 1:
|
|
|
|
|
|
final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2026-03-07 10:32:51 -08:00
|
|
|
|
if final_response is None and (
|
|
|
|
|
|
api_call_count >= self.max_iterations
|
|
|
|
|
|
or self.iteration_budget.remaining <= 0
|
|
|
|
|
|
):
|
|
|
|
|
|
if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
|
|
|
|
|
|
print(f"\n⚠️ Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
|
2026-02-21 04:17:27 -08:00
|
|
|
|
final_response = self._handle_max_iterations(messages, api_call_count)
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
# Determine if conversation completed successfully
|
|
|
|
|
|
completed = final_response is not None and api_call_count < self.max_iterations
|
2025-11-04 03:32:43 -05:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
# Save trajectory if enabled
|
|
|
|
|
|
self._save_trajectory(messages, user_message, completed)
|
2025-11-04 03:32:43 -05:00
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Clean up VM and browser for this task after conversation completes
|
2026-02-20 23:23:32 -08:00
|
|
|
|
self._cleanup_task_resources(effective_task_id)
|
2025-11-04 03:32:43 -05:00
|
|
|
|
|
2026-02-22 17:10:24 -08:00
|
|
|
|
# Persist session to both JSON log and SQLite
|
|
|
|
|
|
self._persist_session(messages, conversation_history)
|
2026-02-25 19:34:25 -05:00
|
|
|
|
|
|
|
|
|
|
# Sync conversation to Honcho for user modeling
|
|
|
|
|
|
if final_response and not interrupted:
|
|
|
|
|
|
self._honcho_sync(original_user_message, final_response)
|
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
|
# Build result with interrupt info if applicable
|
|
|
|
|
|
result = {
|
2025-09-10 00:43:55 -07:00
|
|
|
|
"final_response": final_response,
|
|
|
|
|
|
"messages": messages,
|
|
|
|
|
|
"api_calls": api_call_count,
|
2026-01-10 05:56:26 +00:00
|
|
|
|
"completed": completed,
|
2026-02-03 16:15:49 -08:00
|
|
|
|
"partial": False, # True only when stopped due to invalid tool calls
|
|
|
|
|
|
"interrupted": interrupted,
|
2025-09-10 00:43:55 -07:00
|
|
|
|
}
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
|
|
# Include interrupt message if one triggered the interrupt
|
|
|
|
|
|
if interrupted and self._interrupt_message:
|
|
|
|
|
|
result["interrupt_message"] = self._interrupt_message
|
|
|
|
|
|
|
|
|
|
|
|
# Clear interrupt state after handling
|
|
|
|
|
|
self.clear_interrupt()
|
|
|
|
|
|
|
|
|
|
|
|
return result
|
2025-09-10 00:43:55 -07:00
|
|
|
|
|
|
|
|
|
|
def chat(self, message: str) -> str:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Simple chat interface that returns just the final response.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
message (str): User message
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
str: Final assistant response
|
|
|
|
|
|
"""
|
|
|
|
|
|
result = self.run_conversation(message)
|
|
|
|
|
|
return result["final_response"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main(
|
|
|
|
|
|
query: str = None,
|
2026-02-08 10:49:24 +00:00
|
|
|
|
model: str = "anthropic/claude-opus-4.6",
|
2025-09-10 00:43:55 -07:00
|
|
|
|
api_key: str = None,
|
2026-01-29 06:10:24 +00:00
|
|
|
|
base_url: str = "https://openrouter.ai/api/v1",
|
2025-09-10 00:43:55 -07:00
|
|
|
|
max_turns: int = 10,
|
|
|
|
|
|
enabled_toolsets: str = None,
|
|
|
|
|
|
disabled_toolsets: str = None,
|
|
|
|
|
|
list_tools: bool = False,
|
2025-09-10 00:51:41 -07:00
|
|
|
|
save_trajectories: bool = False,
|
2026-01-29 06:10:24 +00:00
|
|
|
|
save_sample: bool = False,
|
2025-11-07 14:43:44 -05:00
|
|
|
|
verbose: bool = False,
|
|
|
|
|
|
log_prefix_chars: int = 20
|
2025-09-10 00:43:55 -07:00
|
|
|
|
):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Main function for running the agent directly.
|
2025-11-07 14:43:44 -05:00
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Args:
|
|
|
|
|
|
query (str): Natural language query for the agent. Defaults to Python 3.13 example.
|
2026-03-07 09:06:37 -08:00
|
|
|
|
model (str): Model name to use (OpenRouter format: provider/model). Defaults to anthropic/claude-sonnet-4.6.
|
2026-01-29 06:10:24 +00:00
|
|
|
|
api_key (str): API key for authentication. Uses OPENROUTER_API_KEY env var if not provided.
|
|
|
|
|
|
base_url (str): Base URL for the model API. Defaults to https://openrouter.ai/api/v1
|
2025-09-10 00:43:55 -07:00
|
|
|
|
max_turns (int): Maximum number of API call iterations. Defaults to 10.
|
2025-11-07 14:43:44 -05:00
|
|
|
|
enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
|
|
|
|
|
|
toolsets (e.g., "research", "development", "safe").
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Multiple toolsets can be combined: "web,vision"
|
|
|
|
|
|
disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
|
|
|
|
|
|
list_tools (bool): Just list available tools and exit
|
2026-01-29 06:10:24 +00:00
|
|
|
|
save_trajectories (bool): Save conversation trajectories to JSONL files (appends to trajectory_samples.jsonl). Defaults to False.
|
|
|
|
|
|
save_sample (bool): Save a single trajectory sample to a UUID-named JSONL file for inspection. Defaults to False.
|
2025-09-10 00:51:41 -07:00
|
|
|
|
verbose (bool): Enable verbose logging for debugging. Defaults to False.
|
2025-11-07 14:43:44 -05:00
|
|
|
|
log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
Toolset Examples:
|
|
|
|
|
|
- "research": Web search, extract, crawl + vision tools
|
|
|
|
|
|
"""
|
|
|
|
|
|
print("🤖 AI Agent with Tool Calling")
|
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
|
|
|
|
|
|
|
# Handle tool listing
|
|
|
|
|
|
if list_tools:
|
|
|
|
|
|
from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
|
|
|
|
|
|
from toolsets import get_all_toolsets, get_toolset_info
|
|
|
|
|
|
|
|
|
|
|
|
print("📋 Available Tools & Toolsets:")
|
|
|
|
|
|
print("-" * 50)
|
|
|
|
|
|
|
|
|
|
|
|
# Show new toolsets system
|
|
|
|
|
|
print("\n🎯 Predefined Toolsets (New System):")
|
|
|
|
|
|
print("-" * 40)
|
|
|
|
|
|
all_toolsets = get_all_toolsets()
|
|
|
|
|
|
|
|
|
|
|
|
# Group by category
|
|
|
|
|
|
basic_toolsets = []
|
|
|
|
|
|
composite_toolsets = []
|
|
|
|
|
|
scenario_toolsets = []
|
|
|
|
|
|
|
|
|
|
|
|
for name, toolset in all_toolsets.items():
|
|
|
|
|
|
info = get_toolset_info(name)
|
|
|
|
|
|
if info:
|
|
|
|
|
|
entry = (name, info)
|
|
|
|
|
|
if name in ["web", "terminal", "vision", "creative", "reasoning"]:
|
|
|
|
|
|
basic_toolsets.append(entry)
|
|
|
|
|
|
elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
|
|
|
|
|
|
composite_toolsets.append(entry)
|
|
|
|
|
|
else:
|
|
|
|
|
|
scenario_toolsets.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
# Print basic toolsets
|
|
|
|
|
|
print("\n📌 Basic Toolsets:")
|
|
|
|
|
|
for name, info in basic_toolsets:
|
|
|
|
|
|
tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
|
|
|
|
|
|
print(f" • {name:15} - {info['description']}")
|
|
|
|
|
|
print(f" Tools: {tools_str}")
|
|
|
|
|
|
|
|
|
|
|
|
# Print composite toolsets
|
|
|
|
|
|
print("\n📂 Composite Toolsets (built from other toolsets):")
|
|
|
|
|
|
for name, info in composite_toolsets:
|
|
|
|
|
|
includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
|
|
|
|
|
|
print(f" • {name:15} - {info['description']}")
|
|
|
|
|
|
print(f" Includes: {includes_str}")
|
|
|
|
|
|
print(f" Total tools: {info['tool_count']}")
|
|
|
|
|
|
|
|
|
|
|
|
# Print scenario-specific toolsets
|
|
|
|
|
|
print("\n🎭 Scenario-Specific Toolsets:")
|
|
|
|
|
|
for name, info in scenario_toolsets:
|
|
|
|
|
|
print(f" • {name:20} - {info['description']}")
|
|
|
|
|
|
print(f" Total tools: {info['tool_count']}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Show legacy toolset compatibility
|
|
|
|
|
|
print("\n📦 Legacy Toolsets (for backward compatibility):")
|
|
|
|
|
|
legacy_toolsets = get_available_toolsets()
|
|
|
|
|
|
for name, info in legacy_toolsets.items():
|
|
|
|
|
|
status = "✅" if info["available"] else "❌"
|
|
|
|
|
|
print(f" {status} {name}: {info['description']}")
|
|
|
|
|
|
if not info["available"]:
|
|
|
|
|
|
print(f" Requirements: {', '.join(info['requirements'])}")
|
|
|
|
|
|
|
|
|
|
|
|
# Show individual tools
|
|
|
|
|
|
all_tools = get_all_tool_names()
|
|
|
|
|
|
print(f"\n🔧 Individual Tools ({len(all_tools)} available):")
|
|
|
|
|
|
for tool_name in sorted(all_tools):
|
|
|
|
|
|
toolset = get_toolset_for_tool(tool_name)
|
|
|
|
|
|
print(f" 📌 {tool_name} (from {toolset})")
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n💡 Usage Examples:")
|
|
|
|
|
|
print(f" # Use predefined toolsets")
|
|
|
|
|
|
print(f" python run_agent.py --enabled_toolsets=research --query='search for Python news'")
|
|
|
|
|
|
print(f" python run_agent.py --enabled_toolsets=development --query='debug this code'")
|
|
|
|
|
|
print(f" python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
|
|
|
|
|
|
print(f" ")
|
|
|
|
|
|
print(f" # Combine multiple toolsets")
|
|
|
|
|
|
print(f" python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
|
|
|
|
|
|
print(f" ")
|
|
|
|
|
|
print(f" # Disable toolsets")
|
|
|
|
|
|
print(f" python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
|
|
|
|
|
|
print(f" ")
|
|
|
|
|
|
print(f" # Run with trajectory saving enabled")
|
|
|
|
|
|
print(f" python run_agent.py --save_trajectories --query='your question here'")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Parse toolset selection arguments
|
|
|
|
|
|
enabled_toolsets_list = None
|
|
|
|
|
|
disabled_toolsets_list = None
|
|
|
|
|
|
|
|
|
|
|
|
if enabled_toolsets:
|
|
|
|
|
|
enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
|
|
|
|
|
|
print(f"🎯 Enabled toolsets: {enabled_toolsets_list}")
|
|
|
|
|
|
|
|
|
|
|
|
if disabled_toolsets:
|
|
|
|
|
|
disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
|
|
|
|
|
|
print(f"🚫 Disabled toolsets: {disabled_toolsets_list}")
|
|
|
|
|
|
|
|
|
|
|
|
if save_trajectories:
|
|
|
|
|
|
print(f"💾 Trajectory saving: ENABLED")
|
|
|
|
|
|
print(f" - Successful conversations → trajectory_samples.jsonl")
|
|
|
|
|
|
print(f" - Failed conversations → failed_trajectories.jsonl")
|
|
|
|
|
|
|
|
|
|
|
|
# Initialize agent with provided parameters
|
|
|
|
|
|
try:
|
|
|
|
|
|
agent = AIAgent(
|
|
|
|
|
|
base_url=base_url,
|
|
|
|
|
|
model=model,
|
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
|
max_iterations=max_turns,
|
|
|
|
|
|
enabled_toolsets=enabled_toolsets_list,
|
|
|
|
|
|
disabled_toolsets=disabled_toolsets_list,
|
2025-09-10 00:51:41 -07:00
|
|
|
|
save_trajectories=save_trajectories,
|
2025-11-07 14:43:44 -05:00
|
|
|
|
verbose_logging=verbose,
|
|
|
|
|
|
log_prefix_chars=log_prefix_chars
|
2025-09-10 00:43:55 -07:00
|
|
|
|
)
|
|
|
|
|
|
except RuntimeError as e:
|
|
|
|
|
|
print(f"❌ Failed to initialize agent: {e}")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Use provided query or default to Python 3.13 example
|
|
|
|
|
|
if query is None:
|
|
|
|
|
|
user_query = (
|
|
|
|
|
|
"Tell me about the latest developments in Python 3.13 and what new features "
|
|
|
|
|
|
"developers should know about. Please search for current information and try it out."
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
user_query = query
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n📝 User Query: {user_query}")
|
|
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
|
|
|
|
|
|
|
# Run conversation
|
|
|
|
|
|
result = agent.run_conversation(user_query)
|
|
|
|
|
|
|
|
|
|
|
|
print("\n" + "=" * 50)
|
|
|
|
|
|
print("📋 CONVERSATION SUMMARY")
|
|
|
|
|
|
print("=" * 50)
|
|
|
|
|
|
print(f"✅ Completed: {result['completed']}")
|
|
|
|
|
|
print(f"📞 API Calls: {result['api_calls']}")
|
|
|
|
|
|
print(f"💬 Messages: {len(result['messages'])}")
|
|
|
|
|
|
|
|
|
|
|
|
if result['final_response']:
|
|
|
|
|
|
print(f"\n🎯 FINAL RESPONSE:")
|
|
|
|
|
|
print("-" * 30)
|
|
|
|
|
|
print(result['final_response'])
|
|
|
|
|
|
|
2026-01-29 06:10:24 +00:00
|
|
|
|
# Save sample trajectory to UUID-named file if requested
|
|
|
|
|
|
if save_sample:
|
|
|
|
|
|
sample_id = str(uuid.uuid4())[:8]
|
2026-01-30 07:34:58 +00:00
|
|
|
|
sample_filename = f"sample_{sample_id}.json"
|
2026-01-29 06:10:24 +00:00
|
|
|
|
|
|
|
|
|
|
# Convert messages to trajectory format (same as batch_runner)
|
|
|
|
|
|
trajectory = agent._convert_to_trajectory_format(
|
|
|
|
|
|
result['messages'],
|
|
|
|
|
|
user_query,
|
|
|
|
|
|
result['completed']
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
entry = {
|
|
|
|
|
|
"conversations": trajectory,
|
|
|
|
|
|
"timestamp": datetime.now().isoformat(),
|
|
|
|
|
|
"model": model,
|
|
|
|
|
|
"completed": result['completed'],
|
|
|
|
|
|
"query": user_query
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(sample_filename, "w", encoding="utf-8") as f:
|
2026-01-30 07:34:58 +00:00
|
|
|
|
# Pretty-print JSON with indent for readability
|
|
|
|
|
|
f.write(json.dumps(entry, ensure_ascii=False, indent=2))
|
2026-01-29 06:10:24 +00:00
|
|
|
|
print(f"\n💾 Sample trajectory saved to: {sample_filename}")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"\n⚠️ Failed to save sample: {e}")
|
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
|
print("\n👋 Agent execution completed!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
fire.Fire(main)
|