fix(compression): remove hardcoded gemini-3-flash-preview as default summary model

Closes #2453 The DEFAULT_CONFIG was hardcoding google/gemini-3-flash-preview as the summary_model for context compression. This caused unexpected OpenRouter charges for users who configured a different provider/model, because the compression task would silently fall back to gemini via OpenRouter even when the user's main model was on a different provider. Fix: change summary_model default to empty string. When empty, call_llm() resolves the model through the standard auto-detection chain (auxiliary.compression config -> env vars -> main provider), which correctly uses the user's configured provider and model. Users who want a dedicated cheap model for compression can still explicitly set compression.summary_model in their config.yaml.
2026-03-22 11:20:27 +00:00
parent 0962cbb2e5
commit 0698ddb496
2 changed files with 4 additions and 3 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -159,7 +159,7 @@ DEFAULT_CONFIG = {
    "compression": {
        "enabled": True,
        "threshold": 0.50,
-        "summary_model": "google/gemini-3-flash-preview",
+        "summary_model": "",  # empty = use main configured model
        "summary_provider": "auto",
        "summary_base_url": None,
    },
@@ -1659,7 +1659,8 @@ def show_config():
    print(f"  Enabled:      {'yes' if enabled else 'no'}")
    if enabled:
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
-        print(f"  Model:        {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
+        _sm = compression.get('summary_model', '') or '(main model)'
+        print(f"  Model:        {_sm}")
        comp_provider = compression.get('summary_provider', 'auto')
        if comp_provider != 'auto':
            print(f"  Provider:     {comp_provider}")