feat: call_llm/async_call_llm + config slots + migrate all consumers
Add centralized call_llm() and async_call_llm() functions that own the full LLM request lifecycle: 1. Resolve provider + model from task config or explicit args 2. Get or create a cached client for that provider 3. Format request args (max_tokens handling, provider extra_body) 4. Make the API call with max_tokens/max_completion_tokens retry 5. Return the response Config: expanded auxiliary section with provider:model slots for all tasks (compression, vision, web_extract, session_search, skills_hub, mcp, flush_memories). Config version bumped to 7. Migrated all auxiliary consumers: - context_compressor.py: uses call_llm(task='compression') - vision_tools.py: uses async_call_llm(task='vision') - web_tools.py: uses async_call_llm(task='web_extract') - session_search_tool.py: uses async_call_llm(task='session_search') - browser_tool.py: uses call_llm(task='vision'/'web_extract') - mcp_tool.py: uses call_llm(task='mcp') - skills_guard.py: uses call_llm(provider='openrouter') - run_agent.py flush_memories: uses call_llm(task='flush_memories') Tests updated for context_compressor and MCP tool. Some test mocks still need updating (15 remaining failures from mock pattern changes, 2 pre-existing).
This commit is contained in:
@@ -63,7 +63,7 @@ import time
|
||||
import requests
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pathlib import Path
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
|
||||
# Max tokens for snapshot content before summarization
|
||||
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
|
||||
|
||||
# Vision client — for browser_vision (screenshot analysis)
|
||||
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
|
||||
# browser_tool module from importing (which would disable all 10 browser tools).
|
||||
try:
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
|
||||
|
||||
# Text client — for page snapshot summarization (same config as web_extract)
|
||||
try:
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
|
||||
|
||||
# Module-level alias for availability checks
|
||||
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
|
||||
|
||||
|
||||
def _get_vision_model() -> str:
|
||||
def _get_vision_model() -> Optional[str]:
|
||||
"""Model for browser_vision (screenshot analysis — multimodal)."""
|
||||
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or _DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
|
||||
|
||||
|
||||
def _get_extraction_model() -> str:
|
||||
def _get_extraction_model() -> Optional[str]:
|
||||
"""Model for page snapshot text summarization — same as web_extract."""
|
||||
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
||||
or _DEFAULT_TEXT_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||
|
||||
|
||||
def _is_local_mode() -> bool:
|
||||
@@ -941,9 +918,6 @@ def _extract_relevant_content(
|
||||
|
||||
Falls back to simple truncation when no auxiliary text model is configured.
|
||||
"""
|
||||
if _aux_text_client is None:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
|
||||
if user_task:
|
||||
extraction_prompt = (
|
||||
f"You are a content extractor for a browser automation agent.\n\n"
|
||||
@@ -968,13 +942,16 @@ def _extract_relevant_content(
|
||||
)
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
response = _aux_text_client.chat.completions.create(
|
||||
model=_get_extraction_model(),
|
||||
messages=[{"role": "user", "content": extraction_prompt}],
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
temperature=0.1,
|
||||
)
|
||||
call_kwargs = {
|
||||
"task": "web_extract",
|
||||
"messages": [{"role": "user", "content": extraction_prompt}],
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
model = _get_extraction_model()
|
||||
if model:
|
||||
call_kwargs["model"] = model
|
||||
response = call_llm(**call_kwargs)
|
||||
return response.choices[0].message.content
|
||||
except Exception:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
@@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Check auxiliary vision client
|
||||
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Browser vision unavailable: no auxiliary vision model configured. "
|
||||
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Save screenshot to persistent location so it can be shared with users
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
screenshots_dir = hermes_home / "browser_screenshots"
|
||||
@@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
f"Focus on answering the user's specific question."
|
||||
)
|
||||
|
||||
# Use the sync auxiliary vision client directly
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
# Use the centralized LLM router
|
||||
vision_model = _get_vision_model()
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
|
||||
len(image_data), vision_model)
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=vision_model,
|
||||
messages=[
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes)",
|
||||
len(image_data))
|
||||
call_kwargs = {
|
||||
"task": "vision",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
@@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
],
|
||||
}
|
||||
],
|
||||
**auxiliary_max_tokens_param(2000),
|
||||
temperature=0.1,
|
||||
)
|
||||
"max_tokens": 2000,
|
||||
"temperature": 0.1,
|
||||
}
|
||||
if vision_model:
|
||||
call_kwargs["model"] = vision_model
|
||||
response = call_llm(**call_kwargs)
|
||||
|
||||
analysis = response.choices[0].message.content
|
||||
response_data = {
|
||||
|
||||
Reference in New Issue
Block a user