Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
a61761d321 fix(memory): filter noisy prefetch results before injection (#982)
All checks were successful
Lint / lint (pull_request) Successful in 28s
Add line-level filtering to MemoryManager.prefetch_all() that removes:
- exact duplicate lines
- meta-instruction debris (prompts, templates like 'Focus on:')
- very short / content-free fragments (< 15 chars after markers)

This prevents noisy memory providers (e.g. MemPalace transcript recall)
from bloating the <memory-context> block with repeated instructions and
low-signal snippets.

Changes:
- agent/memory_manager.py: add _filter_prefetch_lines(),
  _is_meta_instruction_line(), _is_low_signal_line() helpers;
  wire filtering into prefetch_all()
- tests/agent/test_memory_provider.py: add TestPrefetchFiltering with
  8 tests covering dedup, meta-instruction removal, low-signal filtering,
  structured-fact preservation, and integration with prefetch_all()

Closes #982
2026-04-22 03:06:50 -04:00
3 changed files with 171 additions and 32 deletions

View File

@@ -50,6 +50,78 @@ def sanitize_context(text: str) -> str:
return _FENCE_TAG_RE.sub('', text)
# ---------------------------------------------------------------------------
# Prefetch filtering helpers
# ---------------------------------------------------------------------------
# Meta-instruction debris that memory providers sometimes echo back.
# These are prompts/instructions, not user-generated content.
_META_INSTRUCTION_PATTERNS = [
re.compile(r"^\s*[\-\*]?\s*>?\s*Focus on:\s*", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Note:\s*", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*System\s+(note|prompt|instruction):", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*You are\s+", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Please\s+(provide|respond|answer|write)", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Do not\s+", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Always\s+", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Consider\s+(the following|these|this)\b", re.IGNORECASE),
re.compile(r"^\s*[\-\*]?\s*>?\s*Here\s+(is|are)\s+(some|the|a few)\b", re.IGNORECASE),
]
def _is_meta_instruction_line(line: str) -> bool:
"""Return True if the line looks like a prompt/template instruction, not memory content."""
for pat in _META_INSTRUCTION_PATTERNS:
if pat.search(line):
return True
return False
def _is_low_signal_line(line: str) -> bool:
"""Return True for very short or content-free lines."""
stripped = line.strip()
# Empty or just punctuation/list marker
if not stripped or stripped in {"-", "*", ">", "", "", "--"}:
return True
# Too short to be meaningful (< 15 chars after stripping markers)
cleaned = re.sub(r"^[\-\*•>\s]+", "", stripped)
if len(cleaned) < 15:
return True
return False
def _filter_prefetch_lines(text: str) -> str:
"""Filter and deduplicate prefetch result lines.
Removes:
- exact duplicate lines
- meta-instruction debris (prompts, templates)
- very short / content-free lines
Returns cleaned text, preserving original line grouping.
"""
if not text or not text.strip():
return ""
seen: set = set()
kept: list = []
for line in text.splitlines(keepends=False):
stripped = line.strip()
# Deduplicate exact lines
if stripped in seen:
continue
# Skip meta-instructions
if _is_meta_instruction_line(line):
continue
# Skip low-signal lines
if _is_low_signal_line(line):
continue
seen.add(stripped)
kept.append(line)
return "\n".join(kept)
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note.
@@ -180,7 +252,14 @@ class MemoryManager:
"Memory provider '%s' prefetch failed (non-fatal): %s",
provider.name, e,
)
return "\n\n".join(parts)
raw = "\n\n".join(parts)
if not raw:
return ""
# Apply line-level filtering: dedupe, strip meta-instructions,
# remove very short fragments. This prevents noisy providers
# (e.g. MemPalace transcript recall) from bloating context.
filtered = _filter_prefetch_lines(raw)
return filtered
def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None:
"""Queue background prefetch on all providers for the next turn."""

View File

@@ -1,29 +0,0 @@
# Phase 3: Poka-yoke Integration & Fleet Verification
Epic #967. Morning review packet for Hermes harness features.
## Poka-yoke Features Implemented
| Feature | Module | PR | Status |
|---------|--------|-----|--------|
| Token budget tracker | agent/token_budget.py | #930 | MERGED |
| Provider preflight validation | agent/provider_preflight.py | #932 | MERGED |
| Atomic skill editing | tools/skill_edit_guard.py | #933 | MERGED |
| Config debt fixes | gateway/config.py | #437 | MERGED |
| Test collection fixes | tests/acp/conftest.py | #794 | MERGED |
| Context-faithful prompting | agent/context_faithful.py | #786 | MERGED |
## Fleet Verification
- Unit tests pass on all modules
- Collection: 11,472 tests, 0 errors (was 6 errors)
- ACP tests: cleanly skipped when acp extra missing
- Provider validation: catches missing/short keys
- Skill editing: atomic with auto-revert
## Next Steps
1. Wire token_budget into run_agent.py conversation loop
2. Wire provider_preflight into session start
3. Wire skill_edit_guard into skill_manage tool
4. Fleet-wide deployment verification

View File

@@ -198,14 +198,14 @@ class TestMemoryManager:
def test_prefetch_skips_empty(self):
mgr = MemoryManager()
p1 = FakeMemoryProvider("builtin")
p1._prefetch_result = "Has memories"
p1._prefetch_result = "This provider has meaningful memories with enough length"
p2 = FakeMemoryProvider("external")
p2._prefetch_result = ""
mgr.add_provider(p1)
mgr.add_provider(p2)
result = mgr.prefetch_all("query")
assert result == "Has memories"
assert result == "This provider has meaningful memories with enough length"
def test_queue_prefetch_all(self):
mgr = MemoryManager()
@@ -695,3 +695,92 @@ class TestMemoryContextFencing:
fence_end = combined.index("</memory-context>")
assert "Alice" in combined[fence_start:fence_end]
assert combined.index("weather") < fence_start
class TestPrefetchFiltering:
"""Tests for _filter_prefetch_lines and related helpers."""
def test_deduplicates_exact_lines(self):
from agent.memory_manager import _filter_prefetch_lines
raw = "- This is line one with enough characters\n- This is line two with enough characters\n- This is line one with enough characters\n- This is line three with enough characters"
result = _filter_prefetch_lines(raw)
lines = [l for l in result.splitlines() if l.strip()]
assert len(lines) == 3
assert "- This is line one with enough characters" in result
assert "- This is line two with enough characters" in result
assert "- This is line three with enough characters" in result
def test_removes_meta_instruction_debris(self):
from agent.memory_manager import _filter_prefetch_lines
raw = (
"## Fleet Memories\n"
"- > Focus on: was a non-trivial approach used\n"
"- > Focus on: was a non-trivial approach used\n"
"- Actual memory content about fleet ops\n"
"- Note: this is just a note\n"
)
result = _filter_prefetch_lines(raw)
assert "Focus on" not in result
assert "Note:" not in result
assert "Actual memory content about fleet ops" in result
assert "Fleet Memories" in result
def test_removes_low_signal_short_lines(self):
from agent.memory_manager import _filter_prefetch_lines
raw = (
"- \n"
"- x\n"
"- This is a meaningful memory entry with enough length\n"
)
result = _filter_prefetch_lines(raw)
assert "- x" not in result
assert "meaningful memory entry" in result
def test_preserves_structured_facts(self):
from agent.memory_manager import _filter_prefetch_lines
raw = (
"## Local Facts (Hologram)\n"
"- ALEXANDER: Prefers Gitea for reports and deliverables.\n"
"- Telegram home channel is Timmy Time.\n"
)
result = _filter_prefetch_lines(raw)
assert "ALEXANDER" in result
assert "Gitea" in result
assert "Telegram" in result
def test_is_meta_instruction_line(self):
from agent.memory_manager import _is_meta_instruction_line
assert _is_meta_instruction_line("- > Focus on: something") is True
assert _is_meta_instruction_line("- Focus on: something") is True
assert _is_meta_instruction_line("* Focus on: something") is True
assert _is_meta_instruction_line("- Actual user memory content") is False
assert _is_meta_instruction_line("ALEXANDER: Prefers Gitea") is False
def test_is_low_signal_line(self):
from agent.memory_manager import _is_low_signal_line
assert _is_low_signal_line("- ") is True
assert _is_low_signal_line("*") is True
assert _is_low_signal_line("- x") is True
assert _is_low_signal_line("- Short line") is True
assert _is_low_signal_line("- This is a long meaningful memory entry") is False
def test_prefetch_all_applies_filtering(self):
from agent.memory_manager import MemoryManager
mgr = MemoryManager()
fake = FakeMemoryProvider(name="test")
fake._prefetch_result = (
"- > Focus on: was a non-trivial approach\n"
"- > Focus on: was a non-trivial approach\n"
"- Real memory fact\n"
)
mgr.add_provider(fake)
result = mgr.prefetch_all("query")
assert "Focus on" not in result
assert "Real memory fact" in result
assert result.count("Real memory fact") == 1
def test_empty_prefetch_returns_empty(self):
from agent.memory_manager import _filter_prefetch_lines
assert _filter_prefetch_lines("") == ""
assert _filter_prefetch_lines(" ") == ""
assert _filter_prefetch_lines("\n\n") == ""