From 9302690e1b71c1abfc2496640f0a8c3a68709d35 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Wed, 11 Mar 2026 22:04:42 -0700 Subject: [PATCH] =?UTF-8?q?refactor:=20remove=20LLM=5FMODEL=20env=20var=20?= =?UTF-8?q?dependency=20=E2=80=94=20config.yaml=20is=20sole=20source=20of?= =?UTF-8?q?=20truth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model selection now comes exclusively from config.yaml (set via 'hermes model' or 'hermes setup'). The LLM_MODEL env var is no longer read or written anywhere in production code. Why: env vars are per-process/per-user and would conflict in multi-agent or multi-tenant setups. Config.yaml is file-based and can be scoped per-user or eventually per-session. Changes: - cli.py: Read model from CLI_CONFIG only, not LLM_MODEL/OPENAI_MODEL - hermes_cli/auth.py: _save_model_choice() no longer writes LLM_MODEL to .env - hermes_cli/setup.py: Remove 12 save_env_value('LLM_MODEL', ...) calls from all provider setup flows - gateway/run.py: Remove LLM_MODEL fallback (HERMES_MODEL still works for gateway process runtime) - cron/scheduler.py: Same - agent/auxiliary_client.py: Remove LLM_MODEL from custom endpoint model detection --- agent/auxiliary_client.py | 2 +- cli.py | 11 ++++++++--- cron/scheduler.py | 2 +- gateway/run.py | 6 +++--- hermes_cli/auth.py | 9 ++++++--- hermes_cli/setup.py | 12 ------------ tests/test_cli_provider_resolution.py | 26 +++++++++++++++++--------- 7 files changed, 36 insertions(+), 32 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 19c2b8bd..1c6ac271 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -443,7 +443,7 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: custom_key = os.getenv("OPENAI_API_KEY") if not custom_base or not custom_key: return None, None - model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" + model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini" logger.debug("Auxiliary client: custom endpoint (%s)", model) return OpenAI(api_key=custom_key, base_url=custom_base), model diff --git a/cli.py b/cli.py index 50e5db8d..d62da32f 100755 --- a/cli.py +++ b/cli.py @@ -1129,12 +1129,17 @@ class HermesCLI: self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") # Configuration - priority: CLI args > env vars > config file - # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config - self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"] + # Model comes from: CLI arg or config.yaml (single source of truth). + # LLM_MODEL/OPENAI_MODEL env vars are NOT checked — config.yaml is + # authoritative. This avoids conflicts in multi-agent setups where + # env vars would stomp each other. + _model_config = CLI_CONFIG.get("model", {}) + _config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "") + self.model = model or _config_model or "anthropic/claude-opus-4.6" # Track whether model was explicitly chosen by the user or fell back # to the global default. Provider-specific normalisation may override # the default silently but should warn when overriding an explicit choice. - self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL")) + self._model_is_default = not model self._explicit_api_key = api_key self._explicit_base_url = base_url diff --git a/cron/scheduler.py b/cron/scheduler.py index 348a25c2..c80122ce 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -180,7 +180,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: except UnicodeDecodeError: load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1") - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6" # Load config.yaml for model, reasoning, prefill, toolsets, provider routing _cfg = {} diff --git a/gateway/run.py b/gateway/run.py index 96d43672..772d4c4f 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1544,7 +1544,7 @@ class GatewayRunner: config_path = _hermes_home / 'config.yaml' # Resolve current model and provider from config - current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6" current_provider = "openrouter" try: if config_path.exists(): @@ -1999,7 +1999,7 @@ class GatewayRunner: return # Read model from config (same as _run_agent) - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6" try: import yaml as _y _cfg_path = _hermes_home / "config.yaml" @@ -3093,7 +3093,7 @@ class GatewayRunner: except Exception: pass - model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6" try: import yaml as _y diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 05d233f9..1ffa85bd 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -1671,8 +1671,12 @@ def _prompt_model_selection(model_ids: List[str], current_model: str = "") -> Op def _save_model_choice(model_id: str) -> None: - """Save the selected model to config.yaml and .env.""" - from hermes_cli.config import save_config, load_config, save_env_value + """Save the selected model to config.yaml (single source of truth). + + The model is stored in config.yaml only — NOT in .env. This avoids + conflicts in multi-agent setups where env vars would stomp each other. + """ + from hermes_cli.config import save_config, load_config config = load_config() # Always use dict format so provider/base_url can be stored alongside @@ -1681,7 +1685,6 @@ def _save_model_choice(model_id: str) -> None: else: config["model"] = {"default": model_id} save_config(config) - save_env_value("LLM_MODEL", model_id) def login_command(args) -> None: diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 6b00952c..2f48574b 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -681,7 +681,6 @@ def setup_model_provider(config: dict): save_env_value("OPENAI_API_KEY", api_key) if model_name: config['model'] = model_name - save_env_value("LLM_MODEL", model_name) # Save provider and base_url to config.yaml so the gateway and CLI # both resolve the correct provider without relying on env-var heuristics. @@ -913,7 +912,6 @@ def setup_model_provider(config: dict): custom = prompt(f" Model name (Enter to keep '{current_model}')") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) elif selected_provider == "openai-codex": from hermes_cli.codex_models import get_codex_model_ids codex_models = get_codex_model_ids() @@ -927,12 +925,10 @@ def setup_model_provider(config: dict): model_idx = prompt_choice("Select default model:", model_choices, default_codex) if model_idx < len(codex_models): config['model'] = codex_models[model_idx] - save_env_value("LLM_MODEL", codex_models[model_idx]) elif model_idx == len(codex_models): custom = prompt("Enter model name") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) elif selected_provider == "zai": # Coding Plan endpoints don't have GLM-5 @@ -950,12 +946,10 @@ def setup_model_provider(config: dict): if model_idx < len(zai_models): config['model'] = zai_models[model_idx] - save_env_value("LLM_MODEL", zai_models[model_idx]) elif model_idx == len(zai_models): custom = prompt("Enter model name") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) # else: keep current elif selected_provider == "kimi-coding": kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"] @@ -968,12 +962,10 @@ def setup_model_provider(config: dict): if model_idx < len(kimi_models): config['model'] = kimi_models[model_idx] - save_env_value("LLM_MODEL", kimi_models[model_idx]) elif model_idx == len(kimi_models): custom = prompt("Enter model name") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) # else: keep current elif selected_provider in ("minimax", "minimax-cn"): minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"] @@ -986,12 +978,10 @@ def setup_model_provider(config: dict): if model_idx < len(minimax_models): config['model'] = minimax_models[model_idx] - save_env_value("LLM_MODEL", minimax_models[model_idx]) elif model_idx == len(minimax_models): custom = prompt("Enter model name") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) # else: keep current else: # Static list for OpenRouter / fallback (from canonical list) @@ -1008,12 +998,10 @@ def setup_model_provider(config: dict): if model_idx < len(ids): config['model'] = ids[model_idx] - save_env_value("LLM_MODEL", ids[model_idx]) elif model_idx == len(ids): # Custom custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)") if custom: config['model'] = custom - save_env_value("LLM_MODEL", custom) # else: Keep current _final_model = config.get('model', '') diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index f4a446ac..2a3dc43e 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -197,21 +197,28 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch): assert shell.model == "gpt-5.2-codex" -def test_codex_provider_trusts_explicit_envvar_model(monkeypatch): - """When the user explicitly sets LLM_MODEL, we trust their choice and - let the API be the judge — even if it's a non-OpenAI model. Only - provider prefixes are stripped; the bare model passes through.""" +def test_codex_provider_uses_config_model(monkeypatch): + """Model comes from config.yaml, not LLM_MODEL env var. + Config.yaml is the single source of truth to avoid multi-agent conflicts.""" cli = _import_cli() - monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6") + # LLM_MODEL env var should be IGNORED (even if set) + monkeypatch.setenv("LLM_MODEL", "should-be-ignored") monkeypatch.delenv("OPENAI_MODEL", raising=False) + # Set model via config + monkeypatch.setitem(cli.CLI_CONFIG, "model", { + "default": "gpt-5.2-codex", + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + }) + def _runtime_resolve(**kwargs): return { "provider": "openai-codex", "api_mode": "codex_responses", "base_url": "https://chatgpt.com/backend-api/codex", - "api_key": "test-key", + "api_key": "fake-codex-token", "source": "env/config", } @@ -220,11 +227,12 @@ def test_codex_provider_trusts_explicit_envvar_model(monkeypatch): shell = cli.HermesCLI(compact=True, max_turns=1) - assert shell._model_is_default is False assert shell._ensure_runtime_credentials() is True assert shell.provider == "openai-codex" - # User explicitly chose this model — it passes through untouched - assert shell.model == "claude-opus-4-6" + # Model from config (may be normalized by codex provider logic) + assert "codex" in shell.model.lower() + # LLM_MODEL env var is NOT used + assert shell.model != "should-be-ignored" def test_codex_provider_preserves_explicit_codex_model(monkeypatch):