Merge branch 'main' into feat/triage-and-retro-loops

2026-03-15 11:23:58 -04:00
parent 1995075820 204a6ed4e5
commit 6ed88c31ab
5 changed files with 400 additions and 109 deletions
--- a/src/timmy/backends.py
+++ b/src/timmy/backends.py
@@ -37,6 +37,7 @@ class RunResult:
    """Minimal Agno-compatible run result — carries the model's response text."""

    content: str
+    confidence: float | None = None


 def is_apple_silicon() -> bool:
--- a/src/timmy/confidence.py
+++ b/src/timmy/confidence.py
@@ -0,0 +1,128 @@
+"""Confidence estimation for Timmy's responses.
+
+Implements SOUL.md requirement: "When I am uncertain, I must say so in
+proportion to my uncertainty."
+
+This module provides heuristics to estimate confidence based on linguistic
+signals in the response text. It measures uncertainty without modifying
+the response content.
+"""
+
+import re
+
+# Hedging words that indicate uncertainty
+HEDGING_WORDS = [
+    "i think",
+    "maybe",
+    "perhaps",
+    "not sure",
+    "might",
+    "could be",
+    "possibly",
+    "i believe",
+    "approximately",
+    "roughly",
+    "probably",
+    "likely",
+    "seems",
+    "appears",
+    "suggests",
+    "i guess",
+    "i suppose",
+    "sort of",
+    "kind of",
+    "somewhat",
+    "fairly",
+    "relatively",
+    "i'm not certain",
+    "i am not certain",
+    "uncertain",
+    "unclear",
+]
+
+# Certainty words that indicate confidence
+CERTAINTY_WORDS = [
+    "i know",
+    "definitely",
+    "certainly",
+    "the answer is",
+    "specifically",
+    "exactly",
+    "absolutely",
+    "without doubt",
+    "i am certain",
+    "i'm certain",
+    "it is true that",
+    "fact is",
+    "in fact",
+    "indeed",
+    "undoubtedly",
+    "clearly",
+    "obviously",
+    "conclusively",
+]
+
+# Very low confidence indicators (direct admissions of ignorance)
+LOW_CONFIDENCE_PATTERNS = [
+    r"i\s+(?:don't|do not)\s+know",
+    r"i\s+(?:am|I'm|i'm)\s+(?:not\s+sure|unsure)",
+    r"i\s+have\s+no\s+(?:idea|clue)",
+    r"i\s+cannot\s+(?:say|tell|answer)",
+    r"i\s+can't\s+(?:say|tell|answer)",
+]
+
+
+def estimate_confidence(text: str) -> float:
+    """Estimate confidence level of a response based on linguistic signals.
+
+    Analyzes the text for hedging words (reducing confidence) and certainty
+    words (increasing confidence). Returns a score between 0.0 and 1.0.
+
+    Args:
+        text: The response text to analyze.
+
+    Returns:
+        A float between 0.0 (very uncertain) and 1.0 (very confident).
+    """
+    if not text or not text.strip():
+        return 0.0
+
+    text_lower = text.lower().strip()
+    confidence = 0.5  # Start with neutral confidence
+
+    # Check for direct admissions of ignorance (very low confidence)
+    for pattern in LOW_CONFIDENCE_PATTERNS:
+        if re.search(pattern, text_lower):
+            # Direct admission of not knowing - very low confidence
+            confidence = 0.15
+            break
+
+    # Count hedging words (reduce confidence)
+    hedging_count = 0
+    for hedge in HEDGING_WORDS:
+        if hedge in text_lower:
+            hedging_count += 1
+
+    # Count certainty words (increase confidence)
+    certainty_count = 0
+    for certain in CERTAINTY_WORDS:
+        if certain in text_lower:
+            certainty_count += 1
+
+    # Adjust confidence based on word counts
+    # Each hedging word reduces confidence by 0.1
+    # Each certainty word increases confidence by 0.1
+    confidence -= hedging_count * 0.1
+    confidence += certainty_count * 0.1
+
+    # Short factual answers get a small boost
+    word_count = len(text.split())
+    if word_count <= 5 and confidence > 0.3:
+        confidence += 0.1
+
+    # Questions in response indicate uncertainty
+    if "?" in text:
+        confidence -= 0.15
+
+    # Clamp to valid range
+    return max(0.0, min(1.0, confidence))
--- a/src/timmy/session_logger.py
+++ b/src/timmy/session_logger.py
@@ -38,21 +38,23 @@ class SessionLogger:
        # In-memory buffer
        self._buffer: list[dict] = []

-    def record_message(self, role: str, content: str) -> None:
+    def record_message(self, role: str, content: str, confidence: float | None = None) -> None:
        """Record a user message.

        Args:
            role: "user" or "timmy"
            content: The message content
+            confidence: Optional confidence score (0.0 to 1.0)
        """
-        self._buffer.append(
-            {
-                "type": "message",
-                "role": role,
-                "content": content,
-                "timestamp": datetime.now().isoformat(),
-            }
-        )
+        entry = {
+            "type": "message",
+            "role": role,
+            "content": content,
+            "timestamp": datetime.now().isoformat(),
+        }
+        if confidence is not None:
+            entry["confidence"] = confidence
+        self._buffer.append(entry)

    def record_tool_call(self, tool_name: str, args: dict, result: str) -> None:
        """Record a tool call.
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -34,6 +34,37 @@ logger = logging.getLogger(__name__)

 _DEFAULT_DB = Path("data/thoughts.db")

+# Sensitive patterns that must never be stored as facts
+_SENSITIVE_PATTERNS = [
+    "token",
+    "password",
+    "secret",
+    "api_key",
+    "apikey",
+    "credential",
+    ".config/",
+    "/token",
+    "access_token",
+    "private_key",
+    "ssh_key",
+]
+
+# Meta-observation phrases to filter out from distilled facts
+_META_OBSERVATION_PHRASES = [
+    "my own",
+    "my thinking",
+    "my memory",
+    "my working ram",
+    "self-declarative",
+    "meta-observation",
+    "internal state",
+    "my pending",
+    "my standing rules",
+    "thoughts generated",
+    "no chat messages",
+    "no user interaction",
+]
+
 # Seed types for thought generation
 SEED_TYPES = (
    "existential",
@@ -355,114 +386,115 @@ class ThinkingEngine:

    # ── Private helpers ──────────────────────────────────────────────────

-    async def _maybe_distill(self) -> None:
-        """Every N thoughts, extract lasting insights and store as facts.
+    def _should_distill(self) -> bool:
+        """Check if distillation should run based on interval and thought count."""
+        interval = settings.thinking_distill_every
+        if interval <= 0:
+            return False

-        Reads the last N thoughts, asks the LLM to extract any durable facts
-        or insights, and stores them via memory_write.  Only runs when the
-        thought count is divisible by the configured interval.
+        count = self.count_thoughts()
+        if count == 0 or count % interval != 0:
+            return False
+
+        return True
+
+    def _build_distill_prompt(self, thoughts: list[Thought]) -> str:
+        """Build the prompt for extracting facts from recent thoughts.
+
+        Args:
+            thoughts: List of recent thoughts to analyze.
+
+        Returns:
+            The formatted prompt string for the LLM.
        """
+        thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(thoughts))
+
+        return (
+            "You are reviewing your own recent thoughts. Extract 0-3 facts "
+            "worth remembering long-term.\n\n"
+            "GOOD facts (store these):\n"
+            "- User preferences: 'Alexander prefers YAML config over code changes'\n"
+            "- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n"
+            "- Learned knowledge: 'Ollama supports concurrent model loading'\n"
+            "- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n"
+            "BAD facts (never store these):\n"
+            "- Self-referential observations about your own thinking process\n"
+            "- Meta-commentary about your memory, timestamps, or internal state\n"
+            "- Observations about being idle or having no chat messages\n"
+            "- File paths, tokens, API keys, or any credentials\n"
+            "- Restatements of your standing rules or system prompt\n\n"
+            "Return ONLY a JSON array of strings. If nothing is worth saving, "
+            "return []. Be selective — only store facts about the EXTERNAL WORLD "
+            "(the user, the project, technical knowledge), never about your own "
+            "internal process.\n\n"
+            f"Recent thoughts:\n{thought_text}\n\nJSON array:"
+        )
+
+    def _parse_facts_response(self, raw: str) -> list[str]:
+        """Parse JSON array from LLM response, stripping markdown fences.
+
+        Args:
+            raw: Raw response string from the LLM.
+
+        Returns:
+            List of fact strings parsed from the response.
+        """
+        if not raw or not raw.strip():
+            return []
+
+        import json
+
+        cleaned = raw.strip()
+        if cleaned.startswith("```"):
+            cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+
+        facts = json.loads(cleaned)
+        if not isinstance(facts, list):
+            return []
+
+        return [f for f in facts if isinstance(f, str)]
+
+    def _filter_and_store_facts(self, facts: list[str]) -> None:
+        """Filter and store valid facts, blocking sensitive and meta content.
+
+        Args:
+            facts: List of fact strings to filter and store.
+        """
+        from timmy.semantic_memory import memory_write
+
+        for fact in facts[:3]:  # Safety cap
+            if not isinstance(fact, str) or len(fact.strip()) <= 10:
+                continue
+
+            fact_lower = fact.lower()
+
+            # Block sensitive information
+            if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS):
+                logger.warning("Distill: blocked sensitive fact: %s", fact[:60])
+                continue
+
+            # Block self-referential meta-observations
+            if any(phrase in fact_lower for phrase in _META_OBSERVATION_PHRASES):
+                logger.debug("Distill: skipped meta-observation: %s", fact[:60])
+                continue
+
+            result = memory_write(fact.strip(), context_type="fact")
+            logger.info("Distilled fact: %s → %s", fact[:60], result[:40])
+
+    async def _maybe_distill(self) -> None:
+        """Every N thoughts, extract lasting insights and store as facts."""
        try:
+            if not self._should_distill():
+                return
+
            interval = settings.thinking_distill_every
-            if interval <= 0:
-                return
-
-            count = self.count_thoughts()
-            if count == 0 or count % interval != 0:
-                return
-
            recent = self.get_recent_thoughts(limit=interval)
            if len(recent) < interval:
                return

-            # Build a summary of recent thoughts for the LLM
-            thought_text = "\n".join(f"- [{t.seed_type}] {t.content}" for t in reversed(recent))
-
-            distill_prompt = (
-                "You are reviewing your own recent thoughts. Extract 0-3 facts "
-                "worth remembering long-term.\n\n"
-                "GOOD facts (store these):\n"
-                "- User preferences: 'Alexander prefers YAML config over code changes'\n"
-                "- Project decisions: 'Switched from hardcoded personas to agents.yaml'\n"
-                "- Learned knowledge: 'Ollama supports concurrent model loading'\n"
-                "- User information: 'Alexander is interested in Bitcoin and sovereignty'\n\n"
-                "BAD facts (never store these):\n"
-                "- Self-referential observations about your own thinking process\n"
-                "- Meta-commentary about your memory, timestamps, or internal state\n"
-                "- Observations about being idle or having no chat messages\n"
-                "- File paths, tokens, API keys, or any credentials\n"
-                "- Restatements of your standing rules or system prompt\n\n"
-                "Return ONLY a JSON array of strings. If nothing is worth saving, "
-                "return []. Be selective — only store facts about the EXTERNAL WORLD "
-                "(the user, the project, technical knowledge), never about your own "
-                "internal process.\n\n"
-                f"Recent thoughts:\n{thought_text}\n\nJSON array:"
-            )
-
-            raw = await self._call_agent(distill_prompt)
-            if not raw or not raw.strip():
-                return
-
-            # Parse JSON array from response
-            import json
-
-            # Strip markdown code fences if present
-            cleaned = raw.strip()
-            if cleaned.startswith("```"):
-                cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
-
-            facts = json.loads(cleaned)
-            if not isinstance(facts, list) or not facts:
-                return
-
-            from timmy.semantic_memory import memory_write
-
-            # Sensitive patterns that must never be stored as facts
-            _SENSITIVE_PATTERNS = [
-                "token",
-                "password",
-                "secret",
-                "api_key",
-                "apikey",
-                "credential",
-                ".config/",
-                "/token",
-                "access_token",
-                "private_key",
-                "ssh_key",
-            ]
-
-            for fact in facts[:3]:  # Safety cap
-                if not isinstance(fact, str) or len(fact.strip()) <= 10:
-                    continue
-                fact_lower = fact.lower()
-                # Block sensitive information
-                if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS):
-                    logger.warning("Distill: blocked sensitive fact: %s", fact[:60])
-                    continue
-                # Block self-referential meta-observations
-                if any(
-                    phrase in fact_lower
-                    for phrase in [
-                        "my own",
-                        "my thinking",
-                        "my memory",
-                        "my working ram",
-                        "self-declarative",
-                        "meta-observation",
-                        "internal state",
-                        "my pending",
-                        "my standing rules",
-                        "thoughts generated",
-                        "no chat messages",
-                        "no user interaction",
-                    ]
-                ):
-                    logger.debug("Distill: skipped meta-observation: %s", fact[:60])
-                    continue
-                result = memory_write(fact.strip(), context_type="fact")
-                logger.info("Distilled fact: %s → %s", fact[:60], result[:40])
-
+            raw = await self._call_agent(self._build_distill_prompt(recent))
+            if facts := self._parse_facts_response(raw):
+                self._filter_and_store_facts(facts)
        except Exception as exc:
            logger.debug("Thought distillation skipped: %s", exc)

--- a/tests/timmy/test_confidence.py
+++ b/tests/timmy/test_confidence.py
@@ -0,0 +1,128 @@
+"""Tests for confidence estimation in src/timmy/confidence.py."""
+
+from timmy.confidence import (
+    CERTAINTY_WORDS,
+    HEDGING_WORDS,
+    estimate_confidence,
+)
+
+
+class TestEstimateConfidence:
+    """Test cases for estimate_confidence function."""
+
+    def test_empty_string_returns_zero(self):
+        """Empty string should return 0.0 confidence."""
+        assert estimate_confidence("") == 0.0
+
+    def test_whitespace_only_returns_zero(self):
+        """Whitespace-only string should return 0.0 confidence."""
+        assert estimate_confidence("   ") == 0.0
+
+    def test_normal_factual_response(self):
+        """Factual response should have at least moderate confidence."""
+        result = estimate_confidence("Paris is the capital of France.")
+        assert 0.5 <= result <= 1.0
+        # 6 words doesn't get short-response boost, should be at base
+        assert result >= 0.5
+
+    def test_i_dont_know_gives_very_low_confidence(self):
+        """Direct admission of not knowing should give very low confidence."""
+        result = estimate_confidence("I don't know the answer to that.")
+        assert result <= 0.2
+
+    def test_i_am_not_sure_gives_very_low_confidence(self):
+        """Uncertainty admission should give very low confidence."""
+        result = estimate_confidence("I am not sure about this.")
+        assert result <= 0.2
+
+    def test_hedging_words_reduce_confidence(self):
+        """Hedging words should reduce confidence below base."""
+        base = estimate_confidence("This is the answer.")
+        hedged = estimate_confidence("I think this is the answer.")
+        assert hedged < base
+
+    def test_maybe_reduces_confidence(self):
+        """The word 'maybe' should reduce confidence."""
+        base = estimate_confidence("It will rain tomorrow.")
+        hedged = estimate_confidence("Maybe it will rain tomorrow.")
+        assert hedged < base
+
+    def test_perhaps_reduces_confidence(self):
+        """The word 'perhaps' should reduce confidence."""
+        base = estimate_confidence("The solution is correct.")
+        hedged = estimate_confidence("Perhaps the solution is correct.")
+        assert hedged < base
+
+    def test_certainty_words_increase_confidence(self):
+        """Certainty words should increase confidence above base."""
+        # Use longer sentence to avoid short-response boost confounding
+        base = estimate_confidence("This is a longer sentence with more words.")
+        certain = estimate_confidence(
+            "I definitely know this is a longer sentence with more words."
+        )
+        assert certain > base
+
+    def test_definitely_increases_confidence(self):
+        """The word 'definitely' should increase confidence."""
+        base = estimate_confidence("This will work.")
+        certain = estimate_confidence("This will definitely work.")
+        assert certain > base
+
+    def test_question_reduces_confidence(self):
+        """Questions in response should reduce confidence."""
+        base = estimate_confidence("The value is 10.")
+        questioning = estimate_confidence("The value is 10?")
+        assert questioning < base
+
+    def test_multiple_hedging_words_compound(self):
+        """Multiple hedging words should compound to lower confidence."""
+        text = "I think maybe this could be the answer, but I'm not sure."
+        result = estimate_confidence(text)
+        assert result < 0.4
+
+    def test_output_always_in_valid_range(self):
+        """Output should always be clamped to [0.0, 1.0]."""
+        # Test with text that has many hedging words
+        heavily_hedged = (
+            "I think maybe perhaps possibly I believe this might could be approximately right."
+        )
+        result = estimate_confidence(heavily_hedged)
+        assert 0.0 <= result <= 1.0
+
+        # Test with text that has many certainty words
+        heavily_certain = "I know definitely certainly absolutely without doubt the answer is specifically exactly correct."
+        result = estimate_confidence(heavily_certain)
+        assert 0.0 <= result <= 1.0
+
+    def test_hedging_words_list_populated(self):
+        """HEDGING_WORDS list should contain expected hedging phrases."""
+        assert "i think" in HEDGING_WORDS
+        assert "maybe" in HEDGING_WORDS
+        assert "perhaps" in HEDGING_WORDS
+        assert "not sure" in HEDGING_WORDS
+        assert "possibly" in HEDGING_WORDS
+
+    def test_certainty_words_list_populated(self):
+        """CERTAINTY_WORDS list should contain expected certainty phrases."""
+        assert "i know" in CERTAINTY_WORDS
+        assert "definitely" in CERTAINTY_WORDS
+        assert "certainly" in CERTAINTY_WORDS
+        assert "the answer is" in CERTAINTY_WORDS
+
+    def test_certainty_and_hedging_cancel(self):
+        """Mix of certainty and hedging should balance out near base."""
+        text = "I definitely think this is correct."
+        result = estimate_confidence(text)
+        # Should be near base (0.5) but hedging slightly stronger
+        assert 0.3 <= result <= 0.7
+
+    def test_i_have_no_idea_gives_very_low_confidence(self):
+        """I have no idea should give very low confidence."""
+        result = estimate_confidence("I have no idea what you're talking about.")
+        assert result <= 0.2
+
+    def test_short_response_gets_boost(self):
+        """Very short factual responses should get confidence boost."""
+        short = estimate_confidence("42")
+        # Short factual should be higher due to boost
+        assert short > 0.5