refactor(gateway): remove broken 1.4x hygiene multiplier entirely
The previous commit capped the 1.4x at 95% of context, but the multiplier itself is unnecessary and confusing: 85% threshold × 1.4 = 119% of context → never fires 95% warn × 1.4 = 133% of context → never warns The 85% hygiene threshold already provides ample headroom over the agent's own 50% compressor. Even if rough estimates overestimate by 50%, hygiene would fire at ~57% actual usage — safe and harmless. Remove the multiplier entirely. Both actual and estimated token paths now use the same 85% / 95% thresholds. Update tests and comments.
This commit is contained in:
@@ -212,71 +212,59 @@ class TestSessionHygieneWarnThreshold:
|
||||
assert post_compress_tokens < warn_threshold
|
||||
|
||||
|
||||
class TestEstimatedTokenSafetyCap:
|
||||
"""Verify the 1.4x safety factor on rough estimates is capped at 95% of
|
||||
context length, preventing the threshold from exceeding the model's
|
||||
actual limit.
|
||||
class TestEstimatedTokenThreshold:
|
||||
"""Verify that hygiene thresholds are always below the model's context
|
||||
limit — for both actual and estimated token counts.
|
||||
|
||||
Bug: For ~200K models (GLM-5-turbo), the uncapped 1.4x pushed the
|
||||
threshold to 238K — above the model's limit — so hygiene never fired.
|
||||
Regression: a previous 1.4x multiplier on rough estimates pushed the
|
||||
threshold to 85% * 1.4 = 119% of context, which exceeded the model's
|
||||
limit and prevented hygiene from ever firing for ~200K models (GLM-5).
|
||||
The fix removed the multiplier entirely — the 85% threshold already
|
||||
provides ample headroom over the agent's 50% compressor.
|
||||
"""
|
||||
|
||||
def test_uncapped_14x_would_exceed_context(self):
|
||||
"""Without the cap, 200K * 0.85 * 1.4 = 238K > 200K (broken)."""
|
||||
def test_threshold_below_context_for_200k_model(self):
|
||||
"""Hygiene threshold must always be below model context."""
|
||||
context_length = 200_000
|
||||
threshold_pct = 0.85
|
||||
raw_threshold = int(context_length * threshold_pct) # 170K
|
||||
uncapped = int(raw_threshold * 1.4) # 238K
|
||||
assert uncapped > context_length, (
|
||||
"Uncapped 1.4x should exceed model context (this is the bug)"
|
||||
)
|
||||
threshold = int(context_length * 0.85)
|
||||
assert threshold < context_length
|
||||
|
||||
def test_capped_14x_stays_within_context(self):
|
||||
"""With the cap, the threshold stays at 95% of context length."""
|
||||
context_length = 200_000
|
||||
threshold_pct = 0.85
|
||||
raw_threshold = int(context_length * threshold_pct) # 170K
|
||||
max_safe = int(context_length * 0.95) # 190K
|
||||
capped = min(int(raw_threshold * 1.4), max_safe)
|
||||
assert capped <= context_length, (
|
||||
f"Capped threshold ({capped:,}) must not exceed context ({context_length:,})"
|
||||
)
|
||||
assert capped == max_safe, (
|
||||
f"For 200K models, the cap should bind: expected {max_safe:,}, got {capped:,}"
|
||||
)
|
||||
|
||||
def test_cap_does_not_affect_large_context_models(self):
|
||||
"""For 1M+ models the 1.4x factor stays below 95%, so cap is no-op."""
|
||||
context_length = 1_000_000
|
||||
threshold_pct = 0.85
|
||||
raw_threshold = int(context_length * threshold_pct) # 850K
|
||||
max_safe = int(context_length * 0.95) # 950K
|
||||
uncapped = int(raw_threshold * 1.4) # 1,190K — but that's > 950K
|
||||
capped = min(uncapped, max_safe)
|
||||
# For very large models the cap still applies but the resulting
|
||||
# threshold (950K) is still large enough to prevent premature compression
|
||||
assert capped <= context_length
|
||||
|
||||
def test_cap_for_128k_model(self):
|
||||
"""128K model: 128K * 0.85 * 1.4 = 152K — exceeds 128K, cap binds."""
|
||||
def test_threshold_below_context_for_128k_model(self):
|
||||
context_length = 128_000
|
||||
threshold_pct = 0.85
|
||||
raw_threshold = int(context_length * threshold_pct) # 108,800
|
||||
max_safe = int(context_length * 0.95) # 121,600
|
||||
uncapped = int(raw_threshold * 1.4) # 152,320
|
||||
capped = min(uncapped, max_safe)
|
||||
assert uncapped > context_length, "1.4x exceeds 128K context"
|
||||
assert capped == max_safe, "Cap should bind for 128K models"
|
||||
assert capped < context_length, "Capped value must be below context limit"
|
||||
threshold = int(context_length * 0.85)
|
||||
assert threshold < context_length
|
||||
|
||||
def test_warn_threshold_capped_at_context_length(self):
|
||||
"""Warn threshold (0.95 * 1.4) must be capped at context_length."""
|
||||
def test_no_multiplier_means_same_threshold_for_estimated_and_actual(self):
|
||||
"""Without the 1.4x, estimated and actual token paths use the same threshold."""
|
||||
context_length = 200_000
|
||||
raw_warn = int(context_length * 0.95) # 190K
|
||||
uncapped_warn = int(raw_warn * 1.4) # 266K
|
||||
capped_warn = min(uncapped_warn, context_length)
|
||||
assert uncapped_warn > context_length
|
||||
assert capped_warn == context_length
|
||||
threshold_pct = 0.85
|
||||
threshold = int(context_length * threshold_pct)
|
||||
# Both paths should use 170K — no inflation
|
||||
assert threshold == 170_000
|
||||
|
||||
def test_warn_threshold_below_context(self):
|
||||
"""Warn threshold (95%) must be below context length."""
|
||||
for ctx in (128_000, 200_000, 1_000_000):
|
||||
warn = int(ctx * 0.95)
|
||||
assert warn < ctx
|
||||
|
||||
def test_overestimate_fires_early_but_safely(self):
|
||||
"""If rough estimate is 50% inflated, hygiene fires at ~57% actual usage.
|
||||
|
||||
That's between the agent's 50% threshold and the model's limit —
|
||||
safe and harmless.
|
||||
"""
|
||||
context_length = 200_000
|
||||
threshold = int(context_length * 0.85) # 170K
|
||||
# If actual tokens = 113K, rough estimate = 113K * 1.5 = 170K
|
||||
# Hygiene fires when estimate hits 170K, actual is ~113K = 57% of ctx
|
||||
actual_when_fires = threshold / 1.5
|
||||
assert actual_when_fires > context_length * 0.50, (
|
||||
"Early fire should still be above agent's 50% threshold"
|
||||
)
|
||||
assert actual_when_fires < context_length, (
|
||||
"Early fire must be well below model limit"
|
||||
)
|
||||
|
||||
|
||||
class TestTokenEstimation:
|
||||
|
||||
Reference in New Issue
Block a user