Compare commits

...

3 Commits

Author SHA1 Message Date
Alexander Whitestone
2c8848b2f1 fix: context pressure warning fires at 85% of context_length, not threshold
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 51s
The _emit_context_pressure warning was firing at 85% of threshold_tokens
(the configurable compaction threshold), not 85% of context_length.
This caused it to fire at 42.5% of actual context when threshold_percent=0.50,
confusing users who expected alignment with the poka-yoke WARNING signal.

Fix: compare against context_length (0.85 * context_length) instead of
threshold_tokens. The compaction_progress passed to _emit_context_pressure
still uses threshold_tokens for the display bar, but the trigger condition
now matches the poka-yoke signal.

Closes #538
2026-04-13 22:24:40 -04:00
954fd992eb Merge pull request 'perf: lazy session creation — defer DB write until first message (#314)' (#449) from whip/314-1776127532 into main
Some checks failed
Forge CI / smoke-and-build (push) Failing after 55s
Forge CI / smoke-and-build (pull_request) Failing after 1m12s
perf: lazy session creation (#314)

Closes #314.
2026-04-14 01:08:13 +00:00
Metatron
f35f56e397 perf: lazy session creation — defer DB write until first message (closes #314)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 56s
Remove eager create_session() call from AIAgent.__init__(). Sessions
are now created lazily on first _flush_messages_to_session_db() call
via ensure_session() which uses INSERT OR IGNORE.

Impact: eliminates 32.4% of sessions (3,564 of 10,985) that were
created at agent init but never received any messages.

The existing ensure_session() fallback in _flush_messages_to_session_db()
already handles this pattern — it was originally designed for recovery
after transient SQLite lock failures. Now it's the primary creation path.

Compression-initiated sessions still use create_session() directly
(line ~5995) since they have messages to write immediately.
2026-04-13 20:52:06 -04:00

View File

@@ -1001,30 +1001,10 @@ class AIAgent:
self._session_db = session_db
self._parent_session_id = parent_session_id
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
parent_session_id=self._parent_session_id,
)
except Exception as e:
# Transient SQLite lock contention (e.g. CLI and gateway writing
# concurrently) must NOT permanently disable session_search for
# this agent. Keep _session_db alive — subsequent message
# flushes and session_search calls will still work once the
# lock clears. The session row may be missing from the index
# for this run, but that is recoverable (flushes upsert rows).
logger.warning(
"Session DB create_session failed (session_search still available): %s", e
)
# Lazy session creation: defer until first message flush (#314).
# _flush_messages_to_session_db() calls ensure_session() which uses
# INSERT OR IGNORE — creating the row only when messages arrive.
# This eliminates 32% of sessions that are created but never used.
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore
@@ -6025,9 +6005,9 @@ class AIAgent:
# can't reduce enough (e.g. threshold is very low, or system prompt
# alone exceeds the warning level), keep the flag set to prevent
# spamming the user with repeated warnings every loop iteration.
if self.context_compressor.threshold_tokens > 0:
_post_progress = _compressed_est / self.context_compressor.threshold_tokens
if _post_progress < 0.85:
if self.context_compressor.context_length > 0:
_context_fill = _compressed_est / self.context_compressor.context_length
if _context_fill < 0.85:
self._context_pressure_warned = False
# Clear the file-read dedup cache. After compression the original
@@ -9011,14 +8991,15 @@ class AIAgent:
# ── Context pressure warnings (user-facing only) ──────────
# Notify the user (NOT the LLM) as context approaches the
# compaction threshold. Thresholds are relative to where
# compaction fires, not the raw context window.
# compaction threshold. Fires at 85% of context_length so
# it aligns with the poka-yoke WARNING signal (#538).
# Does not inject into messages — just prints to CLI output
# and fires status_callback for gateway platforms.
if _compressor.threshold_tokens > 0:
_compaction_progress = _real_tokens / _compressor.threshold_tokens
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
if _compressor.context_length > 0:
_context_fill = _real_tokens / _compressor.context_length
if _context_fill >= 0.85 and not self._context_pressure_warned:
self._context_pressure_warned = True
_compaction_progress = _real_tokens / _compressor.threshold_tokens if _compressor.threshold_tokens > 0 else 1.0
self._emit_context_pressure(_compaction_progress, _compressor)
if self.compression_enabled and _compressor.should_compress(_real_tokens):