[SYNC] Merge upstream NousResearch/hermes-agent — 499 commits #201

Merged
Rockachopa merged 500 commits from upstream-sync into main 2026-04-07 14:03:16 +00:00
2 changed files with 62 additions and 4 deletions
Showing only changes of commit b26e85bf9d - Show all commits

View File

@@ -14,6 +14,7 @@ Improvements over v1:
"""
import logging
import time
from typing import Any, Dict, List, Optional
from agent.auxiliary_client import call_llm
@@ -46,6 +47,7 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
# Chars per token rough estimate
_CHARS_PER_TOKEN = 4
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
class ContextCompressor:
@@ -118,6 +120,7 @@ class ContextCompressor:
# Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None
self._summary_failure_cooldown_until: float = 0.0
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@@ -258,6 +261,14 @@ class ContextCompressor:
the middle turns without a summary rather than inject a useless
placeholder.
"""
now = time.monotonic()
if now < self._summary_failure_cooldown_until:
logger.debug(
"Skipping context summary during cooldown (%.0fs remaining)",
self._summary_failure_cooldown_until - now,
)
return None
summary_budget = self._compute_summary_budget(turns_to_summarize)
content_to_summarize = self._serialize_for_summary(turns_to_summarize)
@@ -345,7 +356,6 @@ Write only the summary body. Do not include any preamble or prefix."""
call_kwargs = {
"task": "compression",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
"max_tokens": summary_budget * 2,
# timeout resolved from auxiliary.compression.timeout config by call_llm
}
@@ -359,13 +369,23 @@ Write only the summary body. Do not include any preamble or prefix."""
summary = content.strip()
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0
return self._with_summary_prefix(summary)
except RuntimeError:
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary.")
"summary. Middle turns will be dropped without summary "
"for %d seconds.",
_SUMMARY_FAILURE_COOLDOWN_SECONDS)
return None
except Exception as e:
logging.warning("Failed to generate context summary: %s", e)
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
logging.warning(
"Failed to generate context summary: %s. "
"Further summary attempts paused for %d seconds.",
e,
_SUMMARY_FAILURE_COOLDOWN_SECONDS,
)
return None
@staticmethod
@@ -648,7 +668,7 @@ Write only the summary body. Do not include any preamble or prefix."""
compressed.append({"role": summary_role, "content": summary})
else:
if not self.quiet_mode:
logger.warning("No summary model available — middle turns dropped without summary")
logger.debug("No summary model available — middle turns dropped without summary")
for i in range(compress_end, n_messages):
msg = messages[i].copy()

View File

@@ -197,6 +197,44 @@ class TestNonStringContent:
assert summary is not None
assert summary == SUMMARY_PREFIX
def test_summary_call_does_not_force_temperature(self):
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "ok"
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True)
messages = [
{"role": "user", "content": "do something"},
{"role": "assistant", "content": "ok"},
]
with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
c._generate_summary(messages)
kwargs = mock_call.call_args.kwargs
assert "temperature" not in kwargs
class TestSummaryFailureCooldown:
def test_summary_failure_enters_cooldown_and_skips_retry(self):
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True)
messages = [
{"role": "user", "content": "do something"},
{"role": "assistant", "content": "ok"},
]
with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")) as mock_call:
first = c._generate_summary(messages)
second = c._generate_summary(messages)
assert first is None
assert second is None
assert mock_call.call_count == 1
class TestSummaryPrefixNormalization:
def test_legacy_prefix_is_replaced(self):