feat: call_llm/async_call_llm + config slots + migrate all consumers
Add centralized call_llm() and async_call_llm() functions that own the full LLM request lifecycle: 1. Resolve provider + model from task config or explicit args 2. Get or create a cached client for that provider 3. Format request args (max_tokens handling, provider extra_body) 4. Make the API call with max_tokens/max_completion_tokens retry 5. Return the response Config: expanded auxiliary section with provider:model slots for all tasks (compression, vision, web_extract, session_search, skills_hub, mcp, flush_memories). Config version bumped to 7. Migrated all auxiliary consumers: - context_compressor.py: uses call_llm(task='compression') - vision_tools.py: uses async_call_llm(task='vision') - web_tools.py: uses async_call_llm(task='web_extract') - session_search_tool.py: uses async_call_llm(task='session_search') - browser_tool.py: uses call_llm(task='vision'/'web_extract') - mcp_tool.py: uses call_llm(task='mcp') - skills_guard.py: uses call_llm(provider='openrouter') - run_agent.py flush_memories: uses call_llm(task='flush_memories') Tests updated for context_compressor and MCP tool. Some test mocks still need updating (15 remaining failures from mock pattern changes, 2 pre-existing).
This commit is contained in:
@@ -22,13 +22,7 @@ import os
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
||||
|
||||
# Resolve the async auxiliary client at import time so we have the model slug.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
||||
from agent.auxiliary_client import async_call_llm
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
@@ -156,26 +150,22 @@ async def _summarize_session(
|
||||
f"Summarize this conversation with focus on: {query}"
|
||||
)
|
||||
|
||||
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _async_aux_client.chat.completions.create(
|
||||
model=_SUMMARIZER_MODEL,
|
||||
response = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
],
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
temperature=0.1,
|
||||
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except RuntimeError:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
@@ -333,8 +323,6 @@ def session_search(
|
||||
|
||||
def check_session_search_requirements() -> bool:
|
||||
"""Requires SQLite state database and an auxiliary text model."""
|
||||
if _async_aux_client is None:
|
||||
return False
|
||||
try:
|
||||
from hermes_state import DEFAULT_DB_PATH
|
||||
return DEFAULT_DB_PATH.parent.exists()
|
||||
|
||||
Reference in New Issue
Block a user