fix: context pressure warning fires at 85% of context_length, not threshold

The _emit_context_pressure warning was firing at 85% of threshold_tokens (the configurable compaction threshold), not 85% of context_length. This caused it to fire at 42.5% of actual context when threshold_percent=0.50, confusing users who expected alignment with the poka-yoke WARNING signal. Fix: compare against context_length (0.85 * context_length) instead of threshold_tokens. The compaction_progress passed to _emit_context_pressure still uses threshold_tokens for the display bar, but the trigger condition now matches the poka-yoke signal. Closes #538
Merge pull request 'perf: lazy session creation — defer DB write until first message (#314 )' (#449 ) from whip/314-1776127532 into main
2026-04-13 22:24:40 -04:00 · 2026-04-14 01:08:13 +00:00 · 2026-04-13 20:52:06 -04:00
1 changed files with 13 additions and 32 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -1001,30 +1001,10 @@ class AIAgent:
        self._session_db = session_db
        self._parent_session_id = parent_session_id
        self._last_flushed_db_idx = 0  # tracks DB-write cursor to prevent duplicate writes
-        if self._session_db:
-            try:
-                self._session_db.create_session(
-                    session_id=self.session_id,
-                    source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
-                    model=self.model,
-                    model_config={
-                        "max_iterations": self.max_iterations,
-                        "reasoning_config": reasoning_config,
-                        "max_tokens": max_tokens,
-                    },
-                    user_id=None,
-                    parent_session_id=self._parent_session_id,
-                )
-            except Exception as e:
-                # Transient SQLite lock contention (e.g. CLI and gateway writing
-                # concurrently) must NOT permanently disable session_search for
-                # this agent.  Keep _session_db alive — subsequent message
-                # flushes and session_search calls will still work once the
-                # lock clears.  The session row may be missing from the index
-                # for this run, but that is recoverable (flushes upsert rows).
-                logger.warning(
-                    "Session DB create_session failed (session_search still available): %s", e
-                )
+        # Lazy session creation: defer until first message flush (#314).
+        # _flush_messages_to_session_db() calls ensure_session() which uses
+        # INSERT OR IGNORE — creating the row only when messages arrive.
+        # This eliminates 32% of sessions that are created but never used.
        
        # In-memory todo list for task planning (one per agent/session)
        from tools.todo_tool import TodoStore
@@ -6025,9 +6005,9 @@ class AIAgent:
        # can't reduce enough (e.g. threshold is very low, or system prompt
        # alone exceeds the warning level), keep the flag set to prevent
        # spamming the user with repeated warnings every loop iteration.
-        if self.context_compressor.threshold_tokens > 0:
-            _post_progress = _compressed_est / self.context_compressor.threshold_tokens
-            if _post_progress < 0.85:
+        if self.context_compressor.context_length > 0:
+            _context_fill = _compressed_est / self.context_compressor.context_length
+            if _context_fill < 0.85:
                self._context_pressure_warned = False

        # Clear the file-read dedup cache.  After compression the original
@@ -9011,14 +8991,15 @@ class AIAgent:

                    # ── Context pressure warnings (user-facing only) ──────────
                    # Notify the user (NOT the LLM) as context approaches the
-                    # compaction threshold.  Thresholds are relative to where
-                    # compaction fires, not the raw context window.
+                    # compaction threshold.  Fires at 85% of context_length so
+                    # it aligns with the poka-yoke WARNING signal (#538).
                    # Does not inject into messages — just prints to CLI output
                    # and fires status_callback for gateway platforms.
-                    if _compressor.threshold_tokens > 0:
-                        _compaction_progress = _real_tokens / _compressor.threshold_tokens
-                        if _compaction_progress >= 0.85 and not self._context_pressure_warned:
+                    if _compressor.context_length > 0:
+                        _context_fill = _real_tokens / _compressor.context_length
+                        if _context_fill >= 0.85 and not self._context_pressure_warned:
                            self._context_pressure_warned = True
+                            _compaction_progress = _real_tokens / _compressor.threshold_tokens if _compressor.threshold_tokens > 0 else 1.0
                            self._emit_context_pressure(_compaction_progress, _compressor)

                    if self.compression_enabled and _compressor.should_compress(_real_tokens):