From 9302690e1b71c1abfc2496640f0a8c3a68709d35 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 11 Mar 2026 22:04:42 -0700
Subject: [PATCH] =?UTF-8?q?refactor:=20remove=20LLM=5FMODEL=20env=20var=20?=
 =?UTF-8?q?dependency=20=E2=80=94=20config.yaml=20is=20sole=20source=20of?=
 =?UTF-8?q?=20truth?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model selection now comes exclusively from config.yaml (set via
'hermes model' or 'hermes setup'). The LLM_MODEL env var is no longer
read or written anywhere in production code.

Why: env vars are per-process/per-user and would conflict in
multi-agent or multi-tenant setups. Config.yaml is file-based and
can be scoped per-user or eventually per-session.

Changes:
- cli.py: Read model from CLI_CONFIG only, not LLM_MODEL/OPENAI_MODEL
- hermes_cli/auth.py: _save_model_choice() no longer writes LLM_MODEL
  to .env
- hermes_cli/setup.py: Remove 12 save_env_value('LLM_MODEL', ...)
  calls from all provider setup flows
- gateway/run.py: Remove LLM_MODEL fallback (HERMES_MODEL still works
  for gateway process runtime)
- cron/scheduler.py: Same
- agent/auxiliary_client.py: Remove LLM_MODEL from custom endpoint
  model detection
---
 agent/auxiliary_client.py             |  2 +-
 cli.py                                | 11 ++++++++---
 cron/scheduler.py                     |  2 +-
 gateway/run.py                        |  6 +++---
 hermes_cli/auth.py                    |  9 ++++++---
 hermes_cli/setup.py                   | 12 ------------
 tests/test_cli_provider_resolution.py | 26 +++++++++++++++++---------
 7 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 19c2b8bd..1c6ac271 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -443,7 +443,7 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
     custom_key = os.getenv("OPENAI_API_KEY")
     if not custom_base or not custom_key:
         return None, None
-    model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
+    model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
     logger.debug("Auxiliary client: custom endpoint (%s)", model)
     return OpenAI(api_key=custom_key, base_url=custom_base), model
 
diff --git a/cli.py b/cli.py
index 50e5db8d..d62da32f 100755
--- a/cli.py
+++ b/cli.py
@@ -1129,12 +1129,17 @@ class HermesCLI:
         self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
         
         # Configuration - priority: CLI args > env vars > config file
-        # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
-        self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
+        # Model comes from: CLI arg or config.yaml (single source of truth).
+        # LLM_MODEL/OPENAI_MODEL env vars are NOT checked — config.yaml is
+        # authoritative.  This avoids conflicts in multi-agent setups where
+        # env vars would stomp each other.
+        _model_config = CLI_CONFIG.get("model", {})
+        _config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
+        self.model = model or _config_model or "anthropic/claude-opus-4.6"
         # Track whether model was explicitly chosen by the user or fell back
         # to the global default.  Provider-specific normalisation may override
         # the default silently but should warn when overriding an explicit choice.
-        self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL"))
+        self._model_is_default = not model
 
         self._explicit_api_key = api_key
         self._explicit_base_url = base_url
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 348a25c2..c80122ce 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -180,7 +180,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         except UnicodeDecodeError:
             load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
 
-        model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+        model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
 
         # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
         _cfg = {}
diff --git a/gateway/run.py b/gateway/run.py
index 96d43672..772d4c4f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1544,7 +1544,7 @@ class GatewayRunner:
         config_path = _hermes_home / 'config.yaml'
 
         # Resolve current model and provider from config
-        current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+        current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
         current_provider = "openrouter"
         try:
             if config_path.exists():
@@ -1999,7 +1999,7 @@ class GatewayRunner:
                 return
 
             # Read model from config (same as _run_agent)
-            model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+            model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
             try:
                 import yaml as _y
                 _cfg_path = _hermes_home / "config.yaml"
@@ -3093,7 +3093,7 @@ class GatewayRunner:
             except Exception:
                 pass
 
-            model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+            model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
 
             try:
                 import yaml as _y
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 05d233f9..1ffa85bd 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1671,8 +1671,12 @@ def _prompt_model_selection(model_ids: List[str], current_model: str = "") -> Op
 
 
 def _save_model_choice(model_id: str) -> None:
-    """Save the selected model to config.yaml and .env."""
-    from hermes_cli.config import save_config, load_config, save_env_value
+    """Save the selected model to config.yaml (single source of truth).
+
+    The model is stored in config.yaml only — NOT in .env.  This avoids
+    conflicts in multi-agent setups where env vars would stomp each other.
+    """
+    from hermes_cli.config import save_config, load_config
 
     config = load_config()
     # Always use dict format so provider/base_url can be stored alongside
@@ -1681,7 +1685,6 @@ def _save_model_choice(model_id: str) -> None:
     else:
         config["model"] = {"default": model_id}
     save_config(config)
-    save_env_value("LLM_MODEL", model_id)
 
 
 def login_command(args) -> None:
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 6b00952c..2f48574b 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -681,7 +681,6 @@ def setup_model_provider(config: dict):
             save_env_value("OPENAI_API_KEY", api_key)
         if model_name:
             config['model'] = model_name
-            save_env_value("LLM_MODEL", model_name)
 
         # Save provider and base_url to config.yaml so the gateway and CLI
         # both resolve the correct provider without relying on env-var heuristics.
@@ -913,7 +912,6 @@ def setup_model_provider(config: dict):
             custom = prompt(f"  Model name (Enter to keep '{current_model}')")
             if custom:
                 config['model'] = custom
-                save_env_value("LLM_MODEL", custom)
         elif selected_provider == "openai-codex":
             from hermes_cli.codex_models import get_codex_model_ids
             codex_models = get_codex_model_ids()
@@ -927,12 +925,10 @@ def setup_model_provider(config: dict):
             model_idx = prompt_choice("Select default model:", model_choices, default_codex)
             if model_idx < len(codex_models):
                 config['model'] = codex_models[model_idx]
-                save_env_value("LLM_MODEL", codex_models[model_idx])
             elif model_idx == len(codex_models):
                 custom = prompt("Enter model name")
                 if custom:
                     config['model'] = custom
-                    save_env_value("LLM_MODEL", custom)
             _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
         elif selected_provider == "zai":
             # Coding Plan endpoints don't have GLM-5
@@ -950,12 +946,10 @@ def setup_model_provider(config: dict):
 
             if model_idx < len(zai_models):
                 config['model'] = zai_models[model_idx]
-                save_env_value("LLM_MODEL", zai_models[model_idx])
             elif model_idx == len(zai_models):
                 custom = prompt("Enter model name")
                 if custom:
                     config['model'] = custom
-                    save_env_value("LLM_MODEL", custom)
             # else: keep current
         elif selected_provider == "kimi-coding":
             kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]
@@ -968,12 +962,10 @@ def setup_model_provider(config: dict):
 
             if model_idx < len(kimi_models):
                 config['model'] = kimi_models[model_idx]
-                save_env_value("LLM_MODEL", kimi_models[model_idx])
             elif model_idx == len(kimi_models):
                 custom = prompt("Enter model name")
                 if custom:
                     config['model'] = custom
-                    save_env_value("LLM_MODEL", custom)
             # else: keep current
         elif selected_provider in ("minimax", "minimax-cn"):
             minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]
@@ -986,12 +978,10 @@ def setup_model_provider(config: dict):
 
             if model_idx < len(minimax_models):
                 config['model'] = minimax_models[model_idx]
-                save_env_value("LLM_MODEL", minimax_models[model_idx])
             elif model_idx == len(minimax_models):
                 custom = prompt("Enter model name")
                 if custom:
                     config['model'] = custom
-                    save_env_value("LLM_MODEL", custom)
             # else: keep current
         else:
             # Static list for OpenRouter / fallback (from canonical list)
@@ -1008,12 +998,10 @@ def setup_model_provider(config: dict):
 
             if model_idx < len(ids):
                 config['model'] = ids[model_idx]
-                save_env_value("LLM_MODEL", ids[model_idx])
             elif model_idx == len(ids):  # Custom
                 custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
                 if custom:
                     config['model'] = custom
-                    save_env_value("LLM_MODEL", custom)
             # else: Keep current
 
         _final_model = config.get('model', '')
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index f4a446ac..2a3dc43e 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -197,21 +197,28 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
     assert shell.model == "gpt-5.2-codex"
 
 
-def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
-    """When the user explicitly sets LLM_MODEL, we trust their choice and
-    let the API be the judge — even if it's a non-OpenAI model.  Only
-    provider prefixes are stripped; the bare model passes through."""
+def test_codex_provider_uses_config_model(monkeypatch):
+    """Model comes from config.yaml, not LLM_MODEL env var.
+    Config.yaml is the single source of truth to avoid multi-agent conflicts."""
     cli = _import_cli()
 
-    monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6")
+    # LLM_MODEL env var should be IGNORED (even if set)
+    monkeypatch.setenv("LLM_MODEL", "should-be-ignored")
     monkeypatch.delenv("OPENAI_MODEL", raising=False)
 
+    # Set model via config
+    monkeypatch.setitem(cli.CLI_CONFIG, "model", {
+        "default": "gpt-5.2-codex",
+        "provider": "openai-codex",
+        "base_url": "https://chatgpt.com/backend-api/codex",
+    })
+
     def _runtime_resolve(**kwargs):
         return {
             "provider": "openai-codex",
             "api_mode": "codex_responses",
             "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "test-key",
+            "api_key": "fake-codex-token",
             "source": "env/config",
         }
 
@@ -220,11 +227,12 @@ def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
 
     shell = cli.HermesCLI(compact=True, max_turns=1)
 
-    assert shell._model_is_default is False
     assert shell._ensure_runtime_credentials() is True
     assert shell.provider == "openai-codex"
-    # User explicitly chose this model — it passes through untouched
-    assert shell.model == "claude-opus-4-6"
+    # Model from config (may be normalized by codex provider logic)
+    assert "codex" in shell.model.lower()
+    # LLM_MODEL env var is NOT used
+    assert shell.model != "should-be-ignored"
 
 
 def test_codex_provider_preserves_explicit_codex_model(monkeypatch):