Merge branch 'main' into codex/align-codex-provider-conventions-mainrepo
This commit is contained in:
374
run_agent.py
374
run_agent.py
@@ -128,6 +128,7 @@ class AIAgent:
|
||||
session_id: str = None,
|
||||
tool_progress_callback: callable = None,
|
||||
clarify_callback: callable = None,
|
||||
step_callback: callable = None,
|
||||
max_tokens: int = None,
|
||||
reasoning_config: Dict[str, Any] = None,
|
||||
prefill_messages: List[Dict[str, Any]] = None,
|
||||
@@ -135,6 +136,7 @@ class AIAgent:
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
session_db=None,
|
||||
honcho_session_key: str = None,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -174,6 +176,8 @@ class AIAgent:
|
||||
skip_context_files (bool): If True, skip auto-injection of SOUL.md, AGENTS.md, and .cursorrules
|
||||
into the system prompt. Use this for batch processing and data generation to avoid
|
||||
polluting trajectories with user-specific persona or project instructions.
|
||||
honcho_session_key (str): Session key for Honcho integration (e.g., "telegram:123456" or CLI session_id).
|
||||
When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
|
||||
"""
|
||||
self.model = model
|
||||
self.max_iterations = max_iterations
|
||||
@@ -200,8 +204,16 @@ class AIAgent:
|
||||
self.provider = "openai-codex"
|
||||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
if base_url and "api.anthropic.com" in base_url.strip().lower():
|
||||
raise ValueError(
|
||||
"Anthropic's native /v1/messages API is not supported yet (planned for a future release). "
|
||||
"Hermes currently requires OpenAI-compatible /chat/completions endpoints. "
|
||||
"To use Claude models now, route through OpenRouter (OPENROUTER_API_KEY) "
|
||||
"or any OpenAI-compatible proxy that wraps the Anthropic API."
|
||||
)
|
||||
self.tool_progress_callback = tool_progress_callback
|
||||
self.clarify_callback = clarify_callback
|
||||
self.step_callback = step_callback
|
||||
self._last_reported_tool = None # Track for "new tool" mode
|
||||
|
||||
# Interrupt mechanism for breaking out of tool loops
|
||||
@@ -304,7 +316,7 @@ class AIAgent:
|
||||
client_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "cli-agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||
@@ -435,6 +447,46 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass # Memory is optional -- don't break agent init
|
||||
|
||||
# Honcho AI-native memory (cross-session user modeling)
|
||||
# Reads ~/.honcho/config.json as the single source of truth.
|
||||
self._honcho = None # HonchoSessionManager | None
|
||||
self._honcho_session_key = honcho_session_key
|
||||
if not skip_memory:
|
||||
try:
|
||||
from honcho_integration.client import HonchoClientConfig, get_honcho_client
|
||||
hcfg = HonchoClientConfig.from_global_config()
|
||||
if hcfg.enabled and hcfg.api_key:
|
||||
from honcho_integration.session import HonchoSessionManager
|
||||
client = get_honcho_client(hcfg)
|
||||
self._honcho = HonchoSessionManager(
|
||||
honcho=client,
|
||||
config=hcfg,
|
||||
context_tokens=hcfg.context_tokens,
|
||||
)
|
||||
# Resolve session key: explicit arg > global sessions map > fallback
|
||||
if not self._honcho_session_key:
|
||||
self._honcho_session_key = (
|
||||
hcfg.resolve_session_name()
|
||||
or "hermes-default"
|
||||
)
|
||||
# Ensure session exists in Honcho
|
||||
self._honcho.get_or_create(self._honcho_session_key)
|
||||
# Inject session context into the honcho tool module
|
||||
from tools.honcho_tools import set_session_context
|
||||
set_session_context(self._honcho, self._honcho_session_key)
|
||||
logger.info(
|
||||
"Honcho active (session: %s, user: %s, workspace: %s)",
|
||||
self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
|
||||
)
|
||||
else:
|
||||
if not hcfg.enabled:
|
||||
logger.debug("Honcho disabled in global config")
|
||||
elif not hcfg.api_key:
|
||||
logger.debug("Honcho enabled but no API key configured")
|
||||
except Exception as e:
|
||||
logger.debug("Honcho init failed (non-fatal): %s", e)
|
||||
self._honcho = None
|
||||
|
||||
# Skills config: nudge interval for skill creation reminders
|
||||
self._skill_nudge_interval = 15
|
||||
try:
|
||||
@@ -446,9 +498,10 @@ class AIAgent:
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via environment variables (can be set in .env or cli-config.yaml)
|
||||
# Configuration via config.yaml (compression section) or environment variables
|
||||
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
||||
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
||||
|
||||
self.context_compressor = ContextCompressor(
|
||||
model=self.model,
|
||||
@@ -456,6 +509,7 @@ class AIAgent:
|
||||
protect_first_n=3,
|
||||
protect_last_n=4,
|
||||
summary_target_tokens=500,
|
||||
summary_model_override=compression_summary_model,
|
||||
quiet_mode=self.quiet_mode,
|
||||
)
|
||||
self.compression_enabled = compression_enabled
|
||||
@@ -467,6 +521,21 @@ class AIAgent:
|
||||
else:
|
||||
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
|
||||
'max_completion_tokens'. OpenRouter, local models, and older
|
||||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
_is_direct_openai = (
|
||||
"api.openai.com" in self.base_url.lower()
|
||||
and "openrouter" not in self.base_url.lower()
|
||||
)
|
||||
if _is_direct_openai:
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
def _has_content_after_think_block(self, content: str) -> bool:
|
||||
"""
|
||||
Check if content has actual text after any <think></think> blocks.
|
||||
@@ -669,7 +738,7 @@ class AIAgent:
|
||||
if not self._session_db:
|
||||
return
|
||||
try:
|
||||
start_idx = (len(conversation_history) if conversation_history else 0) + 1
|
||||
start_idx = len(conversation_history) if conversation_history else 0
|
||||
for msg in messages[start_idx:]:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content")
|
||||
@@ -1016,8 +1085,6 @@ class AIAgent:
|
||||
if not content:
|
||||
return content
|
||||
content = convert_scratchpad_to_think(content)
|
||||
# Strip extra newlines before/after think blocks
|
||||
import re
|
||||
content = re.sub(r'\n+(<think>)', r'\n\1', content)
|
||||
content = re.sub(r'(</think>)\n+', r'\1\n', content)
|
||||
return content.strip()
|
||||
@@ -1144,7 +1211,67 @@ class AIAgent:
|
||||
def is_interrupted(self) -> bool:
|
||||
"""Check if an interrupt has been requested."""
|
||||
return self._interrupt_requested
|
||||
|
||||
|
||||
# ── Honcho integration helpers ──
|
||||
|
||||
def _honcho_prefetch(self, user_message: str) -> str:
|
||||
"""Fetch user context from Honcho for system prompt injection.
|
||||
|
||||
Returns a formatted context block, or empty string if unavailable.
|
||||
"""
|
||||
if not self._honcho or not self._honcho_session_key:
|
||||
return ""
|
||||
try:
|
||||
ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
|
||||
if not ctx:
|
||||
return ""
|
||||
parts = []
|
||||
rep = ctx.get("representation", "")
|
||||
card = ctx.get("card", "")
|
||||
if rep:
|
||||
parts.append(rep)
|
||||
if card:
|
||||
parts.append(card)
|
||||
if not parts:
|
||||
return ""
|
||||
return "# Honcho User Context\n" + "\n\n".join(parts)
|
||||
except Exception as e:
|
||||
logger.debug("Honcho prefetch failed (non-fatal): %s", e)
|
||||
return ""
|
||||
|
||||
def _honcho_save_user_observation(self, content: str) -> str:
|
||||
"""Route a memory tool target=user add to Honcho.
|
||||
|
||||
Sends the content as a user peer message so Honcho's reasoning
|
||||
model can incorporate it into the user representation.
|
||||
"""
|
||||
if not content or not content.strip():
|
||||
return json.dumps({"success": False, "error": "Content cannot be empty."})
|
||||
try:
|
||||
session = self._honcho.get_or_create(self._honcho_session_key)
|
||||
session.add_message("user", f"[observation] {content.strip()}")
|
||||
self._honcho.save(session)
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"target": "user",
|
||||
"message": "Saved to Honcho user model.",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("Honcho user observation failed: %s", e)
|
||||
return json.dumps({"success": False, "error": f"Honcho save failed: {e}"})
|
||||
|
||||
def _honcho_sync(self, user_content: str, assistant_content: str) -> None:
|
||||
"""Sync the user/assistant message pair to Honcho."""
|
||||
if not self._honcho or not self._honcho_session_key:
|
||||
return
|
||||
try:
|
||||
session = self._honcho.get_or_create(self._honcho_session_key)
|
||||
session.add_message("user", user_content)
|
||||
session.add_message("assistant", assistant_content)
|
||||
self._honcho.save(session)
|
||||
except Exception as e:
|
||||
logger.debug("Honcho sync failed (non-fatal): %s", e)
|
||||
|
||||
def _build_system_prompt(self, system_message: str = None) -> str:
|
||||
"""
|
||||
Assemble the full system prompt from all layers.
|
||||
@@ -1184,6 +1311,7 @@ class AIAgent:
|
||||
mem_block = self._memory_store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
prompt_parts.append(mem_block)
|
||||
# USER.md is always included when enabled -- Honcho prefetch is additive.
|
||||
if self._user_profile_enabled:
|
||||
user_block = self._memory_store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
@@ -1865,11 +1993,11 @@ class AIAgent:
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": self.tools if self.tools else None,
|
||||
"timeout": 600.0,
|
||||
"timeout": 900.0,
|
||||
}
|
||||
|
||||
if self.max_tokens is not None:
|
||||
api_kwargs["max_tokens"] = self.max_tokens
|
||||
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||
|
||||
extra_body = {}
|
||||
|
||||
@@ -1994,7 +2122,8 @@ class AIAgent:
|
||||
"[System: The session is being compressed. "
|
||||
"Please save anything worth remembering to your memories.]"
|
||||
)
|
||||
flush_msg = {"role": "user", "content": flush_content}
|
||||
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
|
||||
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
|
||||
messages.append(flush_msg)
|
||||
|
||||
try:
|
||||
@@ -2023,50 +2152,50 @@ class AIAgent:
|
||||
messages.pop() # remove flush msg
|
||||
return
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
assistant_message, _ = self._normalize_codex_response(response)
|
||||
else:
|
||||
api_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 1024,
|
||||
}
|
||||
response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
|
||||
if not response.choices:
|
||||
assistant_message = None
|
||||
else:
|
||||
assistant_message = response.choices[0].message
|
||||
api_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
"temperature": 0.3,
|
||||
**self._max_tokens_param(1024),
|
||||
}
|
||||
|
||||
if assistant_message and assistant_message.tool_calls:
|
||||
# Execute only memory tool calls
|
||||
for tc in assistant_message.tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
_memory_tool(
|
||||
action=args.get("action"),
|
||||
target=args.get("target", "memory"),
|
||||
content=args.get("content"),
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush tool call failed: %s", e)
|
||||
response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
|
||||
|
||||
if response.choices:
|
||||
assistant_message = response.choices[0].message
|
||||
if assistant_message.tool_calls:
|
||||
# Execute only memory tool calls
|
||||
for tc in assistant_message.tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
flush_target = args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
result = _memory_tool(
|
||||
action=args.get("action"),
|
||||
target=flush_target,
|
||||
content=args.get("content"),
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
# Also send user observations to Honcho when active
|
||||
if self._honcho and flush_target == "user" and args.get("action") == "add":
|
||||
self._honcho_save_user_observation(args.get("content", ""))
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush tool call failed: %s", e)
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush API call failed: %s", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward
|
||||
while messages and messages[-1] is not flush_msg and len(messages) > 0:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
|
||||
messages.pop()
|
||||
if messages and messages[-1] is flush_msg:
|
||||
if not messages:
|
||||
break
|
||||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple:
|
||||
@@ -2163,26 +2292,33 @@ class AIAgent:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "session_search" and self._session_db:
|
||||
from tools.session_search_tool import session_search as _session_search
|
||||
function_result = _session_search(
|
||||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
db=self._session_db,
|
||||
)
|
||||
elif function_name == "session_search":
|
||||
if not self._session_db:
|
||||
function_result = json.dumps({"success": False, "error": "Session database not available."})
|
||||
else:
|
||||
from tools.session_search_tool import session_search as _session_search
|
||||
function_result = _session_search(
|
||||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
db=self._session_db,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
|
||||
elif function_name == "memory":
|
||||
target = function_args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
function_result = _memory_tool(
|
||||
action=function_args.get("action"),
|
||||
target=function_args.get("target", "memory"),
|
||||
target=target,
|
||||
content=function_args.get("content"),
|
||||
old_text=function_args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
# Also send user observations to Honcho when active
|
||||
if self._honcho and target == "user" and function_args.get("action") == "add":
|
||||
self._honcho_save_user_observation(function_args.get("content", ""))
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
|
||||
@@ -2258,12 +2394,19 @@ class AIAgent:
|
||||
try:
|
||||
function_result = handle_function_call(function_name, function_args, effective_task_id)
|
||||
_spinner_result = function_result
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
|
||||
finally:
|
||||
tool_duration = time.time() - tool_start_time
|
||||
cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
|
||||
spinner.stop(cute_msg)
|
||||
else:
|
||||
function_result = handle_function_call(function_name, function_args, effective_task_id)
|
||||
try:
|
||||
function_result = handle_function_call(function_name, function_args, effective_task_id)
|
||||
except Exception as tool_error:
|
||||
function_result = f"Error executing tool '{function_name}': {tool_error}"
|
||||
logger.error("handle_function_call raised for %s: %s", function_name, tool_error)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
|
||||
result_preview = function_result[:200] if len(function_result) > 200 else function_result
|
||||
@@ -2350,12 +2493,19 @@ class AIAgent:
|
||||
if _is_nous:
|
||||
summary_extra_body["tags"] = ["product=hermes-agent"]
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
summary_kwargs = self._build_api_kwargs(api_messages)
|
||||
summary_kwargs["tools"] = None
|
||||
summary_response = self._run_codex_stream(summary_kwargs)
|
||||
assistant_message, _ = self._normalize_codex_response(summary_response)
|
||||
final_response = assistant_message.content or ""
|
||||
summary_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
}
|
||||
if self.max_tokens is not None:
|
||||
summary_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||
if summary_extra_body:
|
||||
summary_kwargs["extra_body"] = summary_extra_body
|
||||
|
||||
summary_response = self.client.chat.completions.create(**summary_kwargs)
|
||||
|
||||
if summary_response.choices and summary_response.choices[0].message.content:
|
||||
final_response = summary_response.choices[0].message.content
|
||||
if "<think>" in final_response:
|
||||
final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
|
||||
if final_response:
|
||||
@@ -2435,6 +2585,10 @@ class AIAgent:
|
||||
# Track user turns for memory flush and periodic nudge logic
|
||||
self._user_turn_count += 1
|
||||
|
||||
# Preserve the original user message before nudge injection.
|
||||
# Honcho should receive the actual user input, not system nudges.
|
||||
original_user_message = user_message
|
||||
|
||||
# Periodic memory nudge: remind the model to consider saving memories.
|
||||
# Counter resets whenever the memory tool is actually used.
|
||||
if (self._memory_nudge_interval > 0
|
||||
@@ -2459,6 +2613,14 @@ class AIAgent:
|
||||
)
|
||||
self._iters_since_skill = 0
|
||||
|
||||
# Honcho prefetch: retrieve user context for system prompt injection
|
||||
self._honcho_context = ""
|
||||
if self._honcho and self._honcho_session_key:
|
||||
try:
|
||||
self._honcho_context = self._honcho_prefetch(user_message)
|
||||
except Exception as e:
|
||||
logger.debug("Honcho prefetch failed (non-fatal): %s", e)
|
||||
|
||||
# Add user message
|
||||
user_msg = {"role": "user", "content": user_message}
|
||||
messages.append(user_msg)
|
||||
@@ -2501,6 +2663,22 @@ class AIAgent:
|
||||
|
||||
api_call_count += 1
|
||||
|
||||
# Fire step_callback for gateway hooks (agent:step event)
|
||||
if self.step_callback is not None:
|
||||
try:
|
||||
prev_tools = []
|
||||
for _m in reversed(messages):
|
||||
if _m.get("role") == "assistant" and _m.get("tool_calls"):
|
||||
prev_tools = [
|
||||
tc["function"]["name"]
|
||||
for tc in _m["tool_calls"]
|
||||
if isinstance(tc, dict)
|
||||
]
|
||||
break
|
||||
self.step_callback(api_call_count, prev_tools)
|
||||
except Exception as _step_err:
|
||||
logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
|
||||
|
||||
# Track tool-calling iterations for skill nudge.
|
||||
# Counter resets whenever skill_manage is actually used.
|
||||
if (self._skill_nudge_interval > 0
|
||||
@@ -2538,6 +2716,8 @@ class AIAgent:
|
||||
effective_system = active_system_prompt or ""
|
||||
if self.ephemeral_system_prompt:
|
||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||
if self._honcho_context:
|
||||
effective_system = (effective_system + "\n\n" + self._honcho_context).strip()
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
|
||||
@@ -2587,7 +2767,7 @@ class AIAgent:
|
||||
|
||||
finish_reason = "stop"
|
||||
|
||||
while retry_count <= max_retries:
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
api_kwargs = self._build_api_kwargs(api_messages)
|
||||
if self.api_mode == "codex_responses":
|
||||
@@ -2699,6 +2879,7 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
"final_response": "Operation interrupted.",
|
||||
"messages": messages,
|
||||
@@ -2837,6 +3018,7 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
"final_response": "Operation interrupted.",
|
||||
"messages": messages,
|
||||
@@ -2845,10 +3027,45 @@ class AIAgent:
|
||||
"interrupted": True,
|
||||
}
|
||||
|
||||
# Check for 413 payload-too-large BEFORE generic 4xx handler.
|
||||
# A 413 is a payload-size error — the correct response is to
|
||||
# compress history and retry, not abort immediately.
|
||||
status_code = getattr(api_error, "status_code", None)
|
||||
is_payload_too_large = (
|
||||
status_code == 413
|
||||
or 'request entity too large' in error_msg
|
||||
or 'payload too large' in error_msg
|
||||
or 'error code: 413' in error_msg
|
||||
)
|
||||
|
||||
if is_payload_too_large:
|
||||
print(f"{self.log_prefix}⚠️ Request payload too large (413) - attempting compression...")
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens
|
||||
)
|
||||
|
||||
if len(messages) < original_len:
|
||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
continue # Retry with compressed messages
|
||||
else:
|
||||
print(f"{self.log_prefix}❌ Payload too large and cannot compress further.")
|
||||
logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": "Request payload too large (413). Cannot compress further.",
|
||||
"partial": True
|
||||
}
|
||||
|
||||
# Check for non-retryable client errors (4xx HTTP status codes).
|
||||
# These indicate a problem with the request itself (bad model ID,
|
||||
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500
|
||||
# Note: 413 is excluded — it's handled above via compression.
|
||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
||||
is_client_error = is_client_status_error or any(phrase in error_msg for phrase in [
|
||||
'error code: 400', 'error code: 401', 'error code: 403',
|
||||
'error code: 404', 'error code: 422',
|
||||
@@ -2856,7 +3073,7 @@ class AIAgent:
|
||||
'invalid api key', 'invalid_api_key', 'authentication',
|
||||
'unauthorized', 'forbidden', 'not found',
|
||||
])
|
||||
|
||||
|
||||
if is_client_error:
|
||||
self._dump_api_request_debug(
|
||||
api_kwargs, reason="non_retryable_client_error", error=api_error,
|
||||
@@ -2876,8 +3093,9 @@ class AIAgent:
|
||||
|
||||
# Check for non-retryable errors (context length exceeded)
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'maximum context', 'token limit',
|
||||
'too many tokens', 'reduce the length', 'exceeds the limit'
|
||||
'context length', 'maximum context', 'token limit',
|
||||
'too many tokens', 'reduce the length', 'exceeds the limit',
|
||||
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||
])
|
||||
|
||||
if is_context_length_error:
|
||||
@@ -2912,9 +3130,10 @@ class AIAgent:
|
||||
raise api_error
|
||||
|
||||
wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
|
||||
print(f"⚠️ OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
|
||||
print(f"⏳ Retrying in {wait_time}s...")
|
||||
logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
|
||||
if retry_count >= max_retries:
|
||||
print(f"{self.log_prefix}⚠️ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
|
||||
print(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
|
||||
|
||||
# Sleep in small increments so we can respond to interrupts quickly
|
||||
# instead of blocking the entire wait_time in one sleep() call
|
||||
@@ -2923,6 +3142,7 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
self.clear_interrupt()
|
||||
return {
|
||||
"final_response": "Operation interrupted.",
|
||||
"messages": messages,
|
||||
@@ -3194,7 +3414,8 @@ class AIAgent:
|
||||
tool_names.append(fn.get("name", "unknown"))
|
||||
msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
|
||||
break
|
||||
final_response = fallback
|
||||
# Strip <think> blocks from fallback content for user display
|
||||
final_response = self._strip_think_blocks(fallback).strip()
|
||||
break
|
||||
|
||||
# No fallback -- append the empty message as-is
|
||||
@@ -3253,6 +3474,9 @@ class AIAgent:
|
||||
|
||||
codex_ack_continuations = 0
|
||||
|
||||
# Strip <think> blocks from user-facing response (keep raw in messages for trajectory)
|
||||
final_response = self._strip_think_blocks(final_response).strip()
|
||||
|
||||
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
|
||||
messages.append(final_msg)
|
||||
@@ -3327,7 +3551,11 @@ class AIAgent:
|
||||
|
||||
# Persist session to both JSON log and SQLite
|
||||
self._persist_session(messages, conversation_history)
|
||||
|
||||
|
||||
# Sync conversation to Honcho for user modeling
|
||||
if final_response and not interrupted:
|
||||
self._honcho_sync(original_user_message, final_response)
|
||||
|
||||
# Build result with interrupt info if applicable
|
||||
result = {
|
||||
"final_response": final_response,
|
||||
|
||||
Reference in New Issue
Block a user