Compare commits
1 Commits
queue/378-
...
fix/538-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2c8848b2f1 |
@@ -13,7 +13,6 @@ import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
@@ -644,56 +643,7 @@ def _build_job_prompt(job: dict) -> str:
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
# Regex patterns for local service references that fail on cloud endpoints
|
||||
_CLOUD_INCOMPATIBLE_PATTERNS = [
|
||||
(re.compile(r"\b[Cc]heck\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up|available)", re.IGNORECASE),
|
||||
"Verify system services are healthy using available tools"),
|
||||
(re.compile(r"\b[Vv]erify\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up)", re.IGNORECASE),
|
||||
"Verify system services are healthy using available tools"),
|
||||
(re.compile(r"\bcurl\s+localhost:\d+", re.IGNORECASE),
|
||||
"use available tools to check service health"),
|
||||
(re.compile(r"\bcurl\s+127\.0\.0\.1:\d+", re.IGNORECASE),
|
||||
"use available tools to check service health"),
|
||||
(re.compile(r"\bpoll\s+localhost", re.IGNORECASE),
|
||||
"check service health via available tools"),
|
||||
]
|
||||
|
||||
|
||||
def _rewrite_cloud_incompatible_prompt(prompt: str, base_url: str) -> str:
|
||||
"""Rewrite prompt instructions that assume local service access when running on cloud.
|
||||
|
||||
When a cron job runs on a cloud inference endpoint (Nous, OpenRouter, Anthropic),
|
||||
instructions to "Check Ollama" or "curl localhost:11434" are impossible.
|
||||
Instead of just warning, this rewrites the instruction to a cloud-compatible
|
||||
equivalent that the agent can actually execute.
|
||||
|
||||
Returns the (possibly rewritten) prompt.
|
||||
"""
|
||||
try:
|
||||
from agent.model_metadata import is_local_endpoint
|
||||
except ImportError:
|
||||
return prompt
|
||||
|
||||
if is_local_endpoint(base_url or ""):
|
||||
return prompt # Local — no rewrite needed
|
||||
|
||||
rewritten = prompt
|
||||
for pattern, replacement in _CLOUD_INCOMPATIBLE_PATTERNS:
|
||||
rewritten = pattern.sub(replacement, rewritten)
|
||||
|
||||
if rewritten != prompt:
|
||||
rewritten = (
|
||||
"[NOTE: Some instructions were adjusted for cloud execution. "
|
||||
"Local service checks were rewritten to use available tools.]
|
||||
|
||||
"
|
||||
+ rewritten
|
||||
)
|
||||
|
||||
return rewritten
|
||||
|
||||
|
||||
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
"""
|
||||
Execute a single cron job.
|
||||
|
||||
|
||||
17
run_agent.py
17
run_agent.py
@@ -6005,9 +6005,9 @@ class AIAgent:
|
||||
# can't reduce enough (e.g. threshold is very low, or system prompt
|
||||
# alone exceeds the warning level), keep the flag set to prevent
|
||||
# spamming the user with repeated warnings every loop iteration.
|
||||
if self.context_compressor.threshold_tokens > 0:
|
||||
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
|
||||
if _post_progress < 0.85:
|
||||
if self.context_compressor.context_length > 0:
|
||||
_context_fill = _compressed_est / self.context_compressor.context_length
|
||||
if _context_fill < 0.85:
|
||||
self._context_pressure_warned = False
|
||||
|
||||
# Clear the file-read dedup cache. After compression the original
|
||||
@@ -8991,14 +8991,15 @@ class AIAgent:
|
||||
|
||||
# ── Context pressure warnings (user-facing only) ──────────
|
||||
# Notify the user (NOT the LLM) as context approaches the
|
||||
# compaction threshold. Thresholds are relative to where
|
||||
# compaction fires, not the raw context window.
|
||||
# compaction threshold. Fires at 85% of context_length so
|
||||
# it aligns with the poka-yoke WARNING signal (#538).
|
||||
# Does not inject into messages — just prints to CLI output
|
||||
# and fires status_callback for gateway platforms.
|
||||
if _compressor.threshold_tokens > 0:
|
||||
_compaction_progress = _real_tokens / _compressor.threshold_tokens
|
||||
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
|
||||
if _compressor.context_length > 0:
|
||||
_context_fill = _real_tokens / _compressor.context_length
|
||||
if _context_fill >= 0.85 and not self._context_pressure_warned:
|
||||
self._context_pressure_warned = True
|
||||
_compaction_progress = _real_tokens / _compressor.threshold_tokens if _compressor.threshold_tokens > 0 else 1.0
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
|
||||
if self.compression_enabled and _compressor.should_compress(_real_tokens):
|
||||
|
||||
Reference in New Issue
Block a user