diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 52428d954..4ec8b3cf3 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -40,7 +40,7 @@ _MIN_SUMMARY_TOKENS = 2000
 # Proportion of compressed content to allocate for summary
 _SUMMARY_RATIO = 0.20
 # Absolute ceiling for summary tokens (even on very large context windows)
-_SUMMARY_TOKENS_CEILING = 32_000
+_SUMMARY_TOKENS_CEILING = 12_000
 
 # Placeholder used when pruning old tool results
 _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
@@ -63,10 +63,10 @@ class ContextCompressor:
     def __init__(
         self,
         model: str,
-        threshold_percent: float = 0.80,
+        threshold_percent: float = 0.50,
         protect_first_n: int = 3,
         protect_last_n: int = 20,
-        summary_target_ratio: float = 0.40,
+        summary_target_ratio: float = 0.20,
         quiet_mode: bool = False,
         summary_model_override: str = None,
         base_url: str = "",
@@ -92,8 +92,8 @@ class ContextCompressor:
         self.threshold_tokens = int(self.context_length * threshold_percent)
         self.compression_count = 0
 
-        # Derive token budgets from the target ratio and context length
-        target_tokens = int(self.context_length * self.summary_target_ratio)
+        # Derive token budgets: ratio is relative to the threshold, not total context
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
         self.tail_token_budget = target_tokens
         self.max_summary_tokens = min(
             int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 4469fa54b..dc1a33199 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -236,23 +236,24 @@ browser:
 # 5. Summarizes middle turns using a fast/cheap model
 # 6. Inserts summary as a user message, continues conversation seamlessly
 #
-# Post-compression size scales with the model's context window via target_ratio:
-#   MiniMax 200K context → ~80K post-compression (at 0.40 ratio)
-#   GPT-5   1M   context → ~400K post-compression (at 0.40 ratio)
+# Post-compression tail budget is target_ratio × threshold × context_length:
+#   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
+#   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
 #
 compression:
   # Enable automatic context compression (default: true)
   # Set to false if you prefer to manage context manually or want errors on overflow
   enabled: true
   
-  # Trigger compression at this % of model's context limit (default: 0.80 = 80%)
+  # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
   # Lower values = more aggressive compression, higher values = compress later
-  threshold: 0.80
+  threshold: 0.50
   
-  # Target post-compression size as a fraction of context window (default: 0.40 = 40%)
-  # Controls how much context survives compression. Tail token budget and summary
-  # cap scale with this value. Range: 0.10 - 0.80
-  target_ratio: 0.40
+  # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
+  # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
+  # Summary output is separately capped at 12K tokens (Gemini output limit).
+  # Range: 0.10 - 0.80
+  target_ratio: 0.20
 
   # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
   # Higher values keep more recent conversation intact at the cost of more aggressive
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index f96324fd8..000d8626b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -163,8 +163,8 @@ DEFAULT_CONFIG = {
     
     "compression": {
         "enabled": True,
-        "threshold": 0.80,            # compress when context usage exceeds this ratio
-        "target_ratio": 0.40,         # fraction of context to preserve as recent tail
+        "threshold": 0.50,            # compress when context usage exceeds this ratio
+        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
         "protect_last_n": 20,         # minimum recent messages to keep uncompressed
         "summary_model": "",          # empty = use main configured model
         "summary_provider": "auto",
@@ -1686,8 +1686,8 @@ def show_config():
     enabled = compression.get('enabled', True)
     print(f"  Enabled:      {'yes' if enabled else 'no'}")
     if enabled:
-        print(f"  Threshold:    {compression.get('threshold', 0.80) * 100:.0f}%")
-        print(f"  Target ratio: {compression.get('target_ratio', 0.40) * 100:.0f}% of context preserved")
+        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
+        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
         print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
         _sm = compression.get('summary_model', '') or '(main model)'
         print(f"  Model:        {_sm}")
diff --git a/run_agent.py b/run_agent.py
index 11d728e83..25cfd3674 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1009,10 +1009,10 @@ class AIAgent:
         _compression_cfg = _agent_cfg.get("compression", {})
         if not isinstance(_compression_cfg, dict):
             _compression_cfg = {}
-        compression_threshold = float(_compression_cfg.get("threshold", 0.80))
+        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
         compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
         compression_summary_model = _compression_cfg.get("summary_model") or None
-        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.40))
+        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
         compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))
 
         # Read explicit context_length override from model config
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 380dc8a42..45c832dfc 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -519,24 +519,26 @@ class TestSummaryTargetRatio:
     """Verify that summary_target_ratio properly scales budgets with context window."""
 
     def test_tail_budget_scales_with_context(self):
-        """Tail token budget should be context_length * summary_target_ratio."""
+        """Tail token budget should be threshold_tokens * summary_target_ratio."""
         with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
             c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
-        assert c.tail_token_budget == 80_000
+        # 200K * 0.50 threshold * 0.40 ratio = 40K
+        assert c.tail_token_budget == 40_000
 
         with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
             c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
-        assert c.tail_token_budget == 400_000
+        # 1M * 0.50 threshold * 0.40 ratio = 200K
+        assert c.tail_token_budget == 200_000
 
     def test_summary_cap_scales_with_context(self):
-        """Max summary tokens should be 5% of context, capped at 32K."""
+        """Max summary tokens should be 5% of context, capped at 12K."""
         with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
             c = ContextCompressor(model="test", quiet_mode=True)
         assert c.max_summary_tokens == 10_000  # 200K * 0.05
 
         with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
             c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.max_summary_tokens == 32_000  # capped at ceiling
+        assert c.max_summary_tokens == 12_000  # capped at 12K ceiling
 
     def test_ratio_clamped(self):
         """Ratio should be clamped to [0.10, 0.80]."""
@@ -548,12 +550,12 @@ class TestSummaryTargetRatio:
             c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.95)
         assert c.summary_target_ratio == 0.80
 
-    def test_default_threshold_is_80_percent(self):
-        """Default compression threshold should be 80%."""
+    def test_default_threshold_is_50_percent(self):
+        """Default compression threshold should be 50%."""
         with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
             c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.threshold_percent == 0.80
-        assert c.threshold_tokens == 80_000
+        assert c.threshold_percent == 0.50
+        assert c.threshold_tokens == 50_000
 
     def test_default_protect_last_n_is_20(self):
         """Default protect_last_n should be 20."""