Files
hermes-agent/tests/tools/test_browser_vision_model.py
Alexander Whitestone 671283389c
All checks were successful
Lint / lint (pull_request) Successful in 8s
feat: Wire Gemma 4 vision into browser_tool for screenshot analysis
_get_vision_model() now resolves via a layered priority chain:
  1. BROWSER_VISION_MODEL env var (browser-specific override)
  2. config.yaml browser.vision_model
  3. AUXILIARY_VISION_MODEL env var (backward-compat shared override)
  4. google/gemma-4-27b-it — Gemma 4 native multimodal default

Add browser.vision_model config key to hermes_cli/config.py defaults
with inline documentation.

call_kwargs["model"] is now always set (model is never None), and a
debug log line records which model is in use for each screenshot.

Fixes #816

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:51:04 -04:00

116 lines
5.6 KiB
Python

"""Tests for browser_tool._get_vision_model() — Gemma 4 default (Issue #816).
Covers acceptance criteria from issue #816:
- Browser screenshots use Gemma 4 by default.
- BROWSER_VISION_MODEL env var overrides the model for browser vision only.
- AUXILIARY_VISION_MODEL env var still works as a global override.
- auxiliary.browser_vision.model in config.yaml overrides the default.
- Priority: BROWSER_VISION_MODEL > config.yaml > AUXILIARY_VISION_MODEL > default.
"""
import os
import sys
from unittest.mock import patch, MagicMock
import pytest
class TestGetVisionModelDefault:
def test_default_is_gemma4(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
import tools.browser_tool as bt
with patch("hermes_cli.config.load_config", return_value={}):
model = bt._get_vision_model()
assert model == "google/gemma-4-27b-it"
def test_default_constant(self):
import tools.browser_tool as bt
assert bt._BROWSER_VISION_DEFAULT_MODEL == "google/gemma-4-27b-it"
class TestGetVisionModelEnvOverrides:
def test_browser_vision_model_env_takes_priority(self, monkeypatch):
monkeypatch.setenv("BROWSER_VISION_MODEL", "openai/gpt-4o")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "google/gemini-3-flash-preview")
import tools.browser_tool as bt
assert bt._get_vision_model() == "openai/gpt-4o"
def test_auxiliary_vision_model_fallback(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "google/gemini-3-flash-preview")
import tools.browser_tool as bt
with patch("hermes_cli.config.load_config", return_value={}):
assert bt._get_vision_model() == "google/gemini-3-flash-preview"
def test_browser_vision_model_empty_falls_through(self, monkeypatch):
"""Empty BROWSER_VISION_MODEL should fall through to next step."""
monkeypatch.setenv("BROWSER_VISION_MODEL", "")
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
import tools.browser_tool as bt
with patch("hermes_cli.config.load_config", return_value={}):
# Should reach the default
assert bt._get_vision_model() == "google/gemma-4-27b-it"
def test_auxiliary_vision_model_empty_falls_through(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "")
import tools.browser_tool as bt
with patch("hermes_cli.config.load_config", return_value={}):
assert bt._get_vision_model() == "google/gemma-4-27b-it"
class TestGetVisionModelConfig:
def test_config_overrides_default(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
cfg = {"auxiliary": {"browser_vision": {"model": "anthropic/claude-3-5-haiku"}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
import tools.browser_tool as bt
assert bt._get_vision_model() == "anthropic/claude-3-5-haiku"
def test_config_empty_string_falls_through_to_default(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
cfg = {"auxiliary": {"browser_vision": {"model": ""}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
import tools.browser_tool as bt
assert bt._get_vision_model() == "google/gemma-4-27b-it"
def test_config_load_error_falls_through_to_default(self, monkeypatch):
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
with patch("hermes_cli.config.load_config", side_effect=Exception("config error")):
import tools.browser_tool as bt
assert bt._get_vision_model() == "google/gemma-4-27b-it"
def test_env_beats_config(self, monkeypatch):
monkeypatch.setenv("BROWSER_VISION_MODEL", "openai/gpt-4o")
cfg = {"auxiliary": {"browser_vision": {"model": "anthropic/claude-3-5-haiku"}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
import tools.browser_tool as bt
assert bt._get_vision_model() == "openai/gpt-4o"
def test_config_beats_auxiliary_vision_model(self, monkeypatch):
"""Config should override AUXILIARY_VISION_MODEL when BROWSER_VISION_MODEL unset."""
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "global-override")
cfg = {"auxiliary": {"browser_vision": {"model": "config-model"}}}
with patch("hermes_cli.config.load_config", return_value=cfg):
import tools.browser_tool as bt
assert bt._get_vision_model() == "config-model"
class TestBackwardCompatibility:
"""AUXILIARY_VISION_MODEL must still work for users who already have it configured."""
def test_existing_auxiliary_vision_model_not_broken(self, monkeypatch):
"""Users who set AUXILIARY_VISION_MODEL must not be broken by this change."""
monkeypatch.delenv("BROWSER_VISION_MODEL", raising=False)
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "openai/gpt-4o")
import tools.browser_tool as bt
with patch("hermes_cli.config.load_config", return_value={}):
model = bt._get_vision_model()
assert model == "openai/gpt-4o"
assert model != "google/gemma-4-27b-it"