From 204a6ed4e51f0999093f943b438fc1fdff93a5b1 Mon Sep 17 00:00:00 2001 From: hermes Date: Sun, 15 Mar 2026 11:23:45 -0400 Subject: [PATCH] refactor: decompose _maybe_distill() into focused helpers (#151) (#160) --- src/timmy/thinking.py | 232 ++++++++++++++++++++++++------------------ 1 file changed, 132 insertions(+), 100 deletions(-) diff --git a/src/timmy/thinking.py b/src/timmy/thinking.py index 0eb0e647..1c987999 100644 --- a/src/timmy/thinking.py +++ b/src/timmy/thinking.py @@ -34,6 +34,37 @@ logger = logging.getLogger(__name__) _DEFAULT_DB = Path("data/thoughts.db") +# Sensitive patterns that must never be stored as facts +_SENSITIVE_PATTERNS = [ + "token", + "password", + "secret", + "api_key", + "apikey", + "credential", + ".config/", + "/token", + "access_token", + "private_key", + "ssh_key", +] + +# Meta-observation phrases to filter out from distilled facts +_META_OBSERVATION_PHRASES = [ + "my own", + "my thinking", + "my memory", + "my working ram", + "self-declarative", + "meta-observation", + "internal state", + "my pending", + "my standing rules", + "thoughts generated", + "no chat messages", + "no user interaction", +] + # Seed types for thought generation SEED_TYPES = ( "existential", @@ -355,114 +386,115 @@ class ThinkingEngine: # ── Private helpers ────────────────────────────────────────────────── - async def _maybe_distill(self) -> None: - """Every N thoughts, extract lasting insights and store as facts. + def _should_distill(self) -> bool: + """Check if distillation should run based on interval and thought count.""" + interval = settings.thinking_distill_every + if interval <= 0: + return False - Reads the last N thoughts, asks the LLM to extract any durable facts - or insights, and stores them via memory_write. Only runs when the - thought count is divisible by the configured interval. + count = self.count_thoughts() + if count == 0 or count % interval != 0: + return False + + return True + + def _build_distill_prompt(self, thoughts: list[Thought]) -> str: + """Build the prompt for extracting facts from recent thoughts. + + Args: + thoughts: List of recent thoughts to analyze. + + Returns: + The formatted prompt string for the LLM. """ + thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(thoughts)) + + return ( + "You are reviewing your own recent thoughts. Extract 0-3 facts " + "worth remembering long-term.\n\n" + "GOOD facts (store these):\n" + "- User preferences: 'Alexander prefers YAML config over code changes'\n" + "- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n" + "- Learned knowledge: 'Ollama supports concurrent model loading'\n" + "- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n" + "BAD facts (never store these):\n" + "- Self-referential observations about your own thinking process\n" + "- Meta-commentary about your memory, timestamps, or internal state\n" + "- Observations about being idle or having no chat messages\n" + "- File paths, tokens, API keys, or any credentials\n" + "- Restatements of your standing rules or system prompt\n\n" + "Return ONLY a JSON array of strings. If nothing is worth saving, " + "return []. Be selective — only store facts about the EXTERNAL WORLD " + "(the user, the project, technical knowledge), never about your own " + "internal process.\n\n" + f"Recent thoughts:\n{thought_text}\n\nJSON array:" + ) + + def _parse_facts_response(self, raw: str) -> list[str]: + """Parse JSON array from LLM response, stripping markdown fences. + + Args: + raw: Raw response string from the LLM. + + Returns: + List of fact strings parsed from the response. + """ + if not raw or not raw.strip(): + return [] + + import json + + cleaned = raw.strip() + if cleaned.startswith("```"): + cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip() + + facts = json.loads(cleaned) + if not isinstance(facts, list): + return [] + + return [f for f in facts if isinstance(f, str)] + + def _filter_and_store_facts(self, facts: list[str]) -> None: + """Filter and store valid facts, blocking sensitive and meta content. + + Args: + facts: List of fact strings to filter and store. + """ + from timmy.semantic_memory import memory_write + + for fact in facts[:3]: # Safety cap + if not isinstance(fact, str) or len(fact.strip()) <= 10: + continue + + fact_lower = fact.lower() + + # Block sensitive information + if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS): + logger.warning("Distill: blocked sensitive fact: %s", fact[:60]) + continue + + # Block self-referential meta-observations + if any(phrase in fact_lower for phrase in _META_OBSERVATION_PHRASES): + logger.debug("Distill: skipped meta-observation: %s", fact[:60]) + continue + + result = memory_write(fact.strip(), context_type="fact") + logger.info("Distilled fact: %s → %s", fact[:60], result[:40]) + + async def _maybe_distill(self) -> None: + """Every N thoughts, extract lasting insights and store as facts.""" try: + if not self._should_distill(): + return + interval = settings.thinking_distill_every - if interval <= 0: - return - - count = self.count_thoughts() - if count == 0 or count % interval != 0: - return - recent = self.get_recent_thoughts(limit=interval) if len(recent) < interval: return - # Build a summary of recent thoughts for the LLM - thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(recent)) - - distill_prompt = ( - "You are reviewing your own recent thoughts. Extract 0-3 facts " - "worth remembering long-term.\n\n" - "GOOD facts (store these):\n" - "- User preferences: 'Alexander prefers YAML config over code changes'\n" - "- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n" - "- Learned knowledge: 'Ollama supports concurrent model loading'\n" - "- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n" - "BAD facts (never store these):\n" - "- Self-referential observations about your own thinking process\n" - "- Meta-commentary about your memory, timestamps, or internal state\n" - "- Observations about being idle or having no chat messages\n" - "- File paths, tokens, API keys, or any credentials\n" - "- Restatements of your standing rules or system prompt\n\n" - "Return ONLY a JSON array of strings. If nothing is worth saving, " - "return []. Be selective — only store facts about the EXTERNAL WORLD " - "(the user, the project, technical knowledge), never about your own " - "internal process.\n\n" - f"Recent thoughts:\n{thought_text}\n\nJSON array:" - ) - - raw = await self._call_agent(distill_prompt) - if not raw or not raw.strip(): - return - - # Parse JSON array from response - import json - - # Strip markdown code fences if present - cleaned = raw.strip() - if cleaned.startswith("```"): - cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip() - - facts = json.loads(cleaned) - if not isinstance(facts, list) or not facts: - return - - from timmy.semantic_memory import memory_write - - # Sensitive patterns that must never be stored as facts - _SENSITIVE_PATTERNS = [ - "token", - "password", - "secret", - "api_key", - "apikey", - "credential", - ".config/", - "/token", - "access_token", - "private_key", - "ssh_key", - ] - - for fact in facts[:3]: # Safety cap - if not isinstance(fact, str) or len(fact.strip()) <= 10: - continue - fact_lower = fact.lower() - # Block sensitive information - if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS): - logger.warning("Distill: blocked sensitive fact: %s", fact[:60]) - continue - # Block self-referential meta-observations - if any( - phrase in fact_lower - for phrase in [ - "my own", - "my thinking", - "my memory", - "my working ram", - "self-declarative", - "meta-observation", - "internal state", - "my pending", - "my standing rules", - "thoughts generated", - "no chat messages", - "no user interaction", - ] - ): - logger.debug("Distill: skipped meta-observation: %s", fact[:60]) - continue - result = memory_write(fact.strip(), context_type="fact") - logger.info("Distilled fact: %s → %s", fact[:60], result[:40]) - + raw = await self._call_agent(self._build_distill_prompt(recent)) + if facts := self._parse_facts_response(raw): + self._filter_and_store_facts(facts) except Exception as exc: logger.debug("Thought distillation skipped: %s", exc)