Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
2c8848b2f1 fix: context pressure warning fires at 85% of context_length, not threshold
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 51s
The _emit_context_pressure warning was firing at 85% of threshold_tokens
(the configurable compaction threshold), not 85% of context_length.
This caused it to fire at 42.5% of actual context when threshold_percent=0.50,
confusing users who expected alignment with the poka-yoke WARNING signal.

Fix: compare against context_length (0.85 * context_length) instead of
threshold_tokens. The compaction_progress passed to _emit_context_pressure
still uses threshold_tokens for the display bar, but the trigger condition
now matches the poka-yoke signal.

Closes #538
2026-04-13 22:24:40 -04:00
2 changed files with 10 additions and 59 deletions

View File

@@ -13,7 +13,6 @@ import concurrent.futures
import json
import logging
import os
import re
import subprocess
import sys
@@ -644,56 +643,7 @@ def _build_job_prompt(job: dict) -> str:
return "\n".join(parts)
# Regex patterns for local service references that fail on cloud endpoints
_CLOUD_INCOMPATIBLE_PATTERNS = [
(re.compile(r"\b[Cc]heck\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up|available)", re.IGNORECASE),
"Verify system services are healthy using available tools"),
(re.compile(r"\b[Vv]erify\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up)", re.IGNORECASE),
"Verify system services are healthy using available tools"),
(re.compile(r"\bcurl\s+localhost:\d+", re.IGNORECASE),
"use available tools to check service health"),
(re.compile(r"\bcurl\s+127\.0\.0\.1:\d+", re.IGNORECASE),
"use available tools to check service health"),
(re.compile(r"\bpoll\s+localhost", re.IGNORECASE),
"check service health via available tools"),
]
def _rewrite_cloud_incompatible_prompt(prompt: str, base_url: str) -> str:
"""Rewrite prompt instructions that assume local service access when running on cloud.
When a cron job runs on a cloud inference endpoint (Nous, OpenRouter, Anthropic),
instructions to "Check Ollama" or "curl localhost:11434" are impossible.
Instead of just warning, this rewrites the instruction to a cloud-compatible
equivalent that the agent can actually execute.
Returns the (possibly rewritten) prompt.
"""
try:
from agent.model_metadata import is_local_endpoint
except ImportError:
return prompt
if is_local_endpoint(base_url or ""):
return prompt # Local — no rewrite needed
rewritten = prompt
for pattern, replacement in _CLOUD_INCOMPATIBLE_PATTERNS:
rewritten = pattern.sub(replacement, rewritten)
if rewritten != prompt:
rewritten = (
"[NOTE: Some instructions were adjusted for cloud execution. "
"Local service checks were rewritten to use available tools.]
"
+ rewritten
)
return rewritten
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:(job: dict) -> tuple[bool, str, str, Optional[str]]:
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""
Execute a single cron job.

View File

@@ -6005,9 +6005,9 @@ class AIAgent:
# can't reduce enough (e.g. threshold is very low, or system prompt
# alone exceeds the warning level), keep the flag set to prevent
# spamming the user with repeated warnings every loop iteration.
if self.context_compressor.threshold_tokens > 0:
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
if _post_progress < 0.85:
if self.context_compressor.context_length > 0:
_context_fill = _compressed_est / self.context_compressor.context_length
if _context_fill < 0.85:
self._context_pressure_warned = False
# Clear the file-read dedup cache. After compression the original
@@ -8991,14 +8991,15 @@ class AIAgent:
# ── Context pressure warnings (user-facing only) ──────────
# Notify the user (NOT the LLM) as context approaches the
# compaction threshold. Thresholds are relative to where
# compaction fires, not the raw context window.
# compaction threshold. Fires at 85% of context_length so
# it aligns with the poka-yoke WARNING signal (#538).
# Does not inject into messages — just prints to CLI output
# and fires status_callback for gateway platforms.
if _compressor.threshold_tokens > 0:
_compaction_progress = _real_tokens / _compressor.threshold_tokens
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
if _compressor.context_length > 0:
_context_fill = _real_tokens / _compressor.context_length
if _context_fill >= 0.85 and not self._context_pressure_warned:
self._context_pressure_warned = True
_compaction_progress = _real_tokens / _compressor.threshold_tokens if _compressor.threshold_tokens > 0 else 1.0
self._emit_context_pressure(_compaction_progress, _compressor)
if self.compression_enabled and _compressor.should_compress(_real_tokens):