diff --git a/src/timmy/semantic_memory.py b/src/timmy/semantic_memory.py index 0ef70ef..853cfe5 100644 --- a/src/timmy/semantic_memory.py +++ b/src/timmy/semantic_memory.py @@ -436,10 +436,11 @@ def memory_write(content: str, context_type: str = "fact") -> str: try: from timmy.memory.vector_store import search_memories, store_memory - # Dedup check for facts — skip if a very similar fact already exists + # Dedup check for facts — skip if a similar fact already exists + # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact) if context_type == "fact": existing = search_memories( - content.strip(), limit=3, context_type="fact", min_relevance=0.9 + content.strip(), limit=3, context_type="fact", min_relevance=0.75 ) if existing: return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate." diff --git a/src/timmy/thinking.py b/src/timmy/thinking.py index 3d123ef..034bfbc 100644 --- a/src/timmy/thinking.py +++ b/src/timmy/thinking.py @@ -348,10 +348,23 @@ class ThinkingEngine: thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(recent)) distill_prompt = ( - "You are reviewing your own recent thoughts. Extract 0-3 lasting " - "insights, preferences, or facts worth remembering long-term. " + "You are reviewing your own recent thoughts. Extract 0-3 facts " + "worth remembering long-term.\n\n" + "GOOD facts (store these):\n" + "- User preferences: 'Alexander prefers YAML config over code changes'\n" + "- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n" + "- Learned knowledge: 'Ollama supports concurrent model loading'\n" + "- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n" + "BAD facts (never store these):\n" + "- Self-referential observations about your own thinking process\n" + "- Meta-commentary about your memory, timestamps, or internal state\n" + "- Observations about being idle or having no chat messages\n" + "- File paths, tokens, API keys, or any credentials\n" + "- Restatements of your standing rules or system prompt\n\n" "Return ONLY a JSON array of strings. If nothing is worth saving, " - "return []. Do not include ephemeral observations or repeated themes.\n\n" + "return []. Be selective — only store facts about the EXTERNAL WORLD " + "(the user, the project, technical knowledge), never about your own " + "internal process.\n\n" f"Recent thoughts:\n{thought_text}\n\nJSON array:" ) @@ -373,10 +386,51 @@ class ThinkingEngine: from timmy.semantic_memory import memory_write + # Sensitive patterns that must never be stored as facts + _SENSITIVE_PATTERNS = [ + "token", + "password", + "secret", + "api_key", + "apikey", + "credential", + ".config/", + "/token", + "access_token", + "private_key", + "ssh_key", + ] + for fact in facts[:3]: # Safety cap - if isinstance(fact, str) and len(fact.strip()) > 10: - result = memory_write(fact.strip(), context_type="fact") - logger.info("Distilled fact: %s → %s", fact[:60], result[:40]) + if not isinstance(fact, str) or len(fact.strip()) <= 10: + continue + fact_lower = fact.lower() + # Block sensitive information + if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS): + logger.warning("Distill: blocked sensitive fact: %s", fact[:60]) + continue + # Block self-referential meta-observations + if any( + phrase in fact_lower + for phrase in [ + "my own", + "my thinking", + "my memory", + "my working ram", + "self-declarative", + "meta-observation", + "internal state", + "my pending", + "my standing rules", + "thoughts generated", + "no chat messages", + "no user interaction", + ] + ): + logger.debug("Distill: skipped meta-observation: %s", fact[:60]) + continue + result = memory_write(fact.strip(), context_type="fact") + logger.info("Distilled fact: %s → %s", fact[:60], result[:40]) except Exception as exc: logger.debug("Thought distillation skipped: %s", exc)