Merge pull request 'fix: fact distillation — block garbage and secrets, improve dedup' (#43) from fix/fact-distillation into main
This commit was merged in pull request #43.
This commit is contained in:
@@ -436,10 +436,11 @@ def memory_write(content: str, context_type: str = "fact") -> str:
|
||||
try:
|
||||
from timmy.memory.vector_store import search_memories, store_memory
|
||||
|
||||
# Dedup check for facts — skip if a very similar fact already exists
|
||||
# Dedup check for facts — skip if a similar fact already exists
|
||||
# Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
|
||||
if context_type == "fact":
|
||||
existing = search_memories(
|
||||
content.strip(), limit=3, context_type="fact", min_relevance=0.9
|
||||
content.strip(), limit=3, context_type="fact", min_relevance=0.75
|
||||
)
|
||||
if existing:
|
||||
return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."
|
||||
|
||||
@@ -348,10 +348,23 @@ class ThinkingEngine:
|
||||
thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(recent))
|
||||
|
||||
distill_prompt = (
|
||||
"You are reviewing your own recent thoughts. Extract 0-3 lasting "
|
||||
"insights, preferences, or facts worth remembering long-term. "
|
||||
"You are reviewing your own recent thoughts. Extract 0-3 facts "
|
||||
"worth remembering long-term.\n\n"
|
||||
"GOOD facts (store these):\n"
|
||||
"- User preferences: 'Alexander prefers YAML config over code changes'\n"
|
||||
"- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n"
|
||||
"- Learned knowledge: 'Ollama supports concurrent model loading'\n"
|
||||
"- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n"
|
||||
"BAD facts (never store these):\n"
|
||||
"- Self-referential observations about your own thinking process\n"
|
||||
"- Meta-commentary about your memory, timestamps, or internal state\n"
|
||||
"- Observations about being idle or having no chat messages\n"
|
||||
"- File paths, tokens, API keys, or any credentials\n"
|
||||
"- Restatements of your standing rules or system prompt\n\n"
|
||||
"Return ONLY a JSON array of strings. If nothing is worth saving, "
|
||||
"return []. Do not include ephemeral observations or repeated themes.\n\n"
|
||||
"return []. Be selective — only store facts about the EXTERNAL WORLD "
|
||||
"(the user, the project, technical knowledge), never about your own "
|
||||
"internal process.\n\n"
|
||||
f"Recent thoughts:\n{thought_text}\n\nJSON array:"
|
||||
)
|
||||
|
||||
@@ -373,10 +386,51 @@ class ThinkingEngine:
|
||||
|
||||
from timmy.semantic_memory import memory_write
|
||||
|
||||
# Sensitive patterns that must never be stored as facts
|
||||
_SENSITIVE_PATTERNS = [
|
||||
"token",
|
||||
"password",
|
||||
"secret",
|
||||
"api_key",
|
||||
"apikey",
|
||||
"credential",
|
||||
".config/",
|
||||
"/token",
|
||||
"access_token",
|
||||
"private_key",
|
||||
"ssh_key",
|
||||
]
|
||||
|
||||
for fact in facts[:3]: # Safety cap
|
||||
if isinstance(fact, str) and len(fact.strip()) > 10:
|
||||
result = memory_write(fact.strip(), context_type="fact")
|
||||
logger.info("Distilled fact: %s → %s", fact[:60], result[:40])
|
||||
if not isinstance(fact, str) or len(fact.strip()) <= 10:
|
||||
continue
|
||||
fact_lower = fact.lower()
|
||||
# Block sensitive information
|
||||
if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS):
|
||||
logger.warning("Distill: blocked sensitive fact: %s", fact[:60])
|
||||
continue
|
||||
# Block self-referential meta-observations
|
||||
if any(
|
||||
phrase in fact_lower
|
||||
for phrase in [
|
||||
"my own",
|
||||
"my thinking",
|
||||
"my memory",
|
||||
"my working ram",
|
||||
"self-declarative",
|
||||
"meta-observation",
|
||||
"internal state",
|
||||
"my pending",
|
||||
"my standing rules",
|
||||
"thoughts generated",
|
||||
"no chat messages",
|
||||
"no user interaction",
|
||||
]
|
||||
):
|
||||
logger.debug("Distill: skipped meta-observation: %s", fact[:60])
|
||||
continue
|
||||
result = memory_write(fact.strip(), context_type="fact")
|
||||
logger.info("Distilled fact: %s → %s", fact[:60], result[:40])
|
||||
|
||||
except Exception as exc:
|
||||
logger.debug("Thought distillation skipped: %s", exc)
|
||||
|
||||
Reference in New Issue
Block a user