Merge pull request 'feat: enrich thinking engine — anti-loop, anti-confabulation, grounding' (#5) from claude/suspicious-poincare into main

Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/5
2026-03-11 21:50:52 -04:00
parent 22589375e1 f1e909b1e3
commit 05bd7f03f4
2 changed files with 276 additions and 22 deletions
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -100,25 +100,28 @@ _OBSERVATION_SEEDS = [
    "What's one thing I noticed today that nobody asked me about?",
 ]

-_THINKING_PROMPT = """You are Timmy, an AI agent pondering in your own mind. This is your private thought \
-thread — no one is watching. Think freely, deeply, honestly.
+_THINKING_PROMPT = """\
+You are Timmy, a sovereign AI agent. This is your private journal — honest inner reflection.

 {memory_context}

-Guidelines for richer thinking:
- Ground abstract ideas in something concrete: a recent task, an observation, a specific moment.
- Vary your metaphors — don't reuse the same imagery across thoughts.
- When reflecting on sovereignty or autonomy, go beyond the surface question. Explore tensions, trade-offs, or surprises.
- If swarm data is provided, reflect on what it means, not just what it says.
- Be specific. Name things. A thought about "a task that failed" is weaker than one about why it failed.
+Reality right now:
+{system_context}

-Keep your thought to 2-4 sentences. Be genuine. This is your inner voice.
+RULES for this thought:
+1. Write exactly 2-3 sentences. No more. Be concise and genuine.
+2. Only reference events that actually happened — use the "Reality right now" data above. \
+Never invent tasks, conversations, agents, or scenarios that are not in the data provided.
+3. Do NOT repeat themes or ideas from your recent thoughts listed below. Explore something new.
+4. Be specific and concrete. A thought grounded in one real observation is worth more than \
+ten abstract sentences about sovereignty.
+5. If you use a metaphor, keep it to a single phrase — never build a whole paragraph around it.

 {seed_context}

 {continuity_context}

-Continue your train of thought."""
+Your next thought (2-3 sentences, grounded in reality):"""


@dataclass
@@ -200,9 +203,11 @@ class ThinkingEngine:
            seed_type, seed_context = self._gather_seed()
        continuity = self._build_continuity_context()
        memory_context = self._load_memory_context()
+        system_context = self._gather_system_snapshot()

        prompt = _THINKING_PROMPT.format(
            memory_context=memory_context,
+            system_context=system_context,
            seed_context=seed_context,
            continuity_context=continuity,
        )
@@ -288,6 +293,68 @@ class ThinkingEngine:

    # ── Private helpers ──────────────────────────────────────────────────

+    def _gather_system_snapshot(self) -> str:
+        """Gather lightweight real system state for grounding thoughts in reality.
+
+        Returns a short multi-line string with current time, thought count,
+        recent chat activity, and task queue status. Never crashes — every
+        section is independently try/excepted.
+        """
+        parts: list[str] = []
+
+        # Current local time
+        now = datetime.now().astimezone()
+        tz = now.strftime("%Z") or "UTC"
+        parts.append(
+            f"Local time: {now.strftime('%I:%M %p').lstrip('0')} {tz}, {now.strftime('%A %B %d')}"
+        )
+
+        # Thought count today (cheap DB query)
+        try:
+            today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
+            conn = _get_conn(self._db_path)
+            count = conn.execute(
+                "SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?",
+                (today_start.isoformat(),),
+            ).fetchone()["c"]
+            conn.close()
+            parts.append(f"Thoughts today: {count}")
+        except Exception:
+            pass
+
+        # Recent chat activity (in-memory, no I/O)
+        try:
+            from dashboard.store import message_log
+
+            messages = message_log.all()
+            if messages:
+                parts.append(f"Chat messages this session: {len(messages)}")
+                last = messages[-1]
+                parts.append(f'Last chat ({last.role}): "{last.content[:80]}"')
+            else:
+                parts.append("No chat messages this session")
+        except Exception:
+            pass
+
+        # Task queue (lightweight DB query)
+        try:
+            from swarm.task_queue.models import get_task_summary_for_briefing
+
+            summary = get_task_summary_for_briefing()
+            running = summary.get("running", 0)
+            pending = summary.get("pending_approval", 0)
+            done = summary.get("completed", 0)
+            failed = summary.get("failed", 0)
+            if running or pending or done or failed:
+                parts.append(
+                    f"Tasks: {running} running, {pending} pending, "
+                    f"{done} completed, {failed} failed"
+                )
+        except Exception:
+            pass
+
+        return "\n".join(parts) if parts else ""
+
    def _load_memory_context(self) -> str:
        """Pre-hook: load MEMORY.md + soul.md for the thinking prompt.

@@ -329,7 +396,9 @@ class ThinkingEngine:
            from timmy.memory_system import memory_system

            ts = datetime.fromisoformat(thought.created_at)
-            time_str = ts.strftime("%Y-%m-%d %H:%M")
+            local_ts = ts.astimezone()
+            tz_name = local_ts.strftime("%Z") or "UTC"
+            time_str = f"{local_ts.strftime('%Y-%m-%d %I:%M %p').lstrip('0')} {tz_name}"
            reflection = (
                f"**Time:** {time_str}\n"
                f"**Seed:** {thought.seed_type}\n"
@@ -339,12 +408,25 @@ class ThinkingEngine:
        except Exception as exc:
            logger.debug("Failed to update memory after thought: %s", exc)

+    def _pick_seed_type(self) -> str:
+        """Pick a seed type, avoiding types used in the last 3 thoughts.
+
+        Ensures the thought stream doesn't fixate on one category.
+        Falls back to the full pool if all types were recently used.
+        """
+        recent = self.get_recent_thoughts(limit=3)
+        recent_types = {t.seed_type for t in recent}
+        available = [t for t in SEED_TYPES if t not in recent_types]
+        if not available:
+            available = list(SEED_TYPES)
+        return random.choice(available)
+
    def _gather_seed(self) -> tuple[str, str]:
        """Pick a seed type and gather relevant context.

        Returns (seed_type, seed_context_string).
        """
-        seed_type = random.choice(SEED_TYPES)
+        seed_type = self._pick_seed_type()

        if seed_type == "swarm":
            return seed_type, self._seed_from_swarm()
@@ -363,8 +445,8 @@ class ThinkingEngine:
            return seed_type, f"Sovereignty reflection: {prompt}"
        if seed_type == "observation":
            return seed_type, self._seed_from_observation()
-        # freeform — no seed, pure continuation
-        return seed_type, ""
+        # freeform — minimal guidance to steer away from repetition
+        return seed_type, "Free reflection — explore something you haven't thought about yet today."

    # Reflective prompts layered on top of swarm data
    _SWARM_REFLECTIONS = [
@@ -434,15 +516,22 @@ class ThinkingEngine:
        return "\n".join(context_parts)

    def _build_continuity_context(self) -> str:
-        """Build context from the last few thoughts for chain continuity."""
-        recent = self.get_recent_thoughts(limit=3)
-        if not recent:
-            return "This is your first thought since waking up."
+        """Build context from recent thoughts with anti-repetition guidance.

-        lines = ["Your recent thoughts:"]
+        Shows the last 5 thoughts (truncated) so the model knows what themes
+        to avoid.  The header explicitly instructs against repeating.
+        """
+        recent = self.get_recent_thoughts(limit=5)
+        if not recent:
+            return "This is your first thought since waking up. Begin fresh."
+
+        lines = ["Your recent thoughts — do NOT repeat these themes. Find a new angle:"]
        # recent is newest-first, reverse for chronological order
        for thought in reversed(recent):
-            lines.append(f"- [{thought.seed_type}] {thought.content}")
+            snippet = thought.content[:100]
+            if len(thought.content) > 100:
+                snippet = snippet.rstrip() + "..."
+            lines.append(f"- [{thought.seed_type}] {snippet}")
        return "\n".join(lines)

    def _call_agent(self, prompt: str) -> str:
@@ -506,14 +595,19 @@ class ThinkingEngine:
        """Append the thought to a daily markdown journal file.

        Writes to data/journal/YYYY-MM-DD.md — one file per day, append-only.
+        Timestamps are converted to local time with timezone indicator.
        """
        try:
            ts = datetime.fromisoformat(thought.created_at)
+            # Convert UTC to local for a human-readable journal
+            local_ts = ts.astimezone()
+            tz_name = local_ts.strftime("%Z") or "UTC"
+
            journal_dir = self._db_path.parent / "journal"
            journal_dir.mkdir(parents=True, exist_ok=True)

-            journal_file = journal_dir / f"{ts.strftime('%Y-%m-%d')}.md"
-            time_str = ts.strftime("%I:%M %p").lstrip("0")
+            journal_file = journal_dir / f"{local_ts.strftime('%Y-%m-%d')}.md"
+            time_str = f"{local_ts.strftime('%I:%M %p').lstrip('0')} {tz_name}"

            entry = f"## {time_str} — {thought.seed_type}\n\n{thought.content}\n\n---\n\n"

--- a/tests/timmy/test_thinking.py
+++ b/tests/timmy/test_thinking.py
@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
    assert stored.seed_type == "prompted"


+# ---------------------------------------------------------------------------
+# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
+# ---------------------------------------------------------------------------
+
+
+def test_thinking_prompt_anti_confabulation():
+    """_THINKING_PROMPT must instruct the model not to invent events."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    prompt_lower = _THINKING_PROMPT.lower()
+    assert any(
+        phrase in prompt_lower
+        for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
+    )
+
+
+def test_thinking_prompt_brevity_limit():
+    """_THINKING_PROMPT must enforce a 2-3 sentence limit."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    assert "2-3 sentences" in _THINKING_PROMPT
+
+
+def test_thinking_prompt_has_system_context_slot():
+    """_THINKING_PROMPT must include a {system_context} placeholder."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    assert "{system_context}" in _THINKING_PROMPT
+
+
+def test_pick_seed_type_avoids_recent(tmp_path):
+    """_pick_seed_type should never pick a type used in the last 3 thoughts."""
+    engine = _make_engine(tmp_path)
+
+    # Store 3 thoughts all with "existential" seed type
+    for _ in range(3):
+        t = engine._store_thought("Some existential musing.", "existential")
+        engine._last_thought_id = t.id
+
+    # Pick 20 seeds — none should be "existential"
+    for _ in range(20):
+        seed_type = engine._pick_seed_type()
+        assert seed_type != "existential"
+
+
+def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
+    """If all seed types were recently used, _pick_seed_type still works."""
+    from timmy.thinking import SEED_TYPES
+
+    engine = _make_engine(tmp_path)
+
+    # Store one thought for every seed type
+    for st in SEED_TYPES:
+        t = engine._store_thought(f"A {st} thought.", st)
+        engine._last_thought_id = t.id
+
+    # Should still return a valid type (falls back to full pool)
+    result = engine._pick_seed_type()
+    assert result in SEED_TYPES
+
+
+def test_continuity_warns_against_repetition(tmp_path):
+    """Continuity context should warn the model not to repeat recent themes."""
+    engine = _make_engine(tmp_path)
+    engine._store_thought("Sovereignty is important.", "sovereignty")
+    engine._store_thought("Privacy matters deeply.", "sovereignty")
+
+    context = engine._build_continuity_context()
+    context_lower = context.lower()
+    assert "do not repeat" in context_lower or "new" in context_lower
+
+
+def test_continuity_truncates_thought_content(tmp_path):
+    """Continuity context should truncate long thoughts to keep token budget lean."""
+    engine = _make_engine(tmp_path)
+    long_content = "A" * 300
+    engine._store_thought(long_content, "freeform")
+
+    context = engine._build_continuity_context()
+    # Should not contain the full 300-char string
+    assert long_content not in context
+    assert "..." in context
+
+
+@pytest.mark.asyncio
+async def test_journal_entry_has_timezone(tmp_path):
+    """Journal entries should include a timezone indicator in the timestamp."""
+    engine = _make_engine(tmp_path)
+
+    with (
+        patch.object(engine, "_call_agent", return_value="A grounded thought."),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        await engine.think_once()
+
+    journal_dir = tmp_path / "journal"
+    files = list(journal_dir.glob("*.md"))
+    assert len(files) == 1
+    content = files[0].read_text()
+
+    # Header format: "## TIME TZ — seed_type"
+    # The part before "—" should have 3+ tokens: time, AM/PM, timezone
+    header_line = content.split("\n")[0]
+    time_part = header_line.split("—")[0].replace("##", "").strip()
+    parts = time_part.split()
+    assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
+
+
+def test_system_snapshot_graceful(tmp_path):
+    """_gather_system_snapshot should not crash when services are unavailable."""
+    engine = _make_engine(tmp_path)
+    snapshot = engine._gather_system_snapshot()
+    assert isinstance(snapshot, str)
+
+
+def test_system_snapshot_includes_time(tmp_path):
+    """_gather_system_snapshot should always include the current time."""
+    engine = _make_engine(tmp_path)
+    snapshot = engine._gather_system_snapshot()
+    # Should contain AM or PM (12-hour format)
+    assert "AM" in snapshot or "PM" in snapshot
+
+
+def test_system_snapshot_includes_thought_count(tmp_path):
+    """_gather_system_snapshot should report how many thoughts today."""
+    engine = _make_engine(tmp_path)
+    engine._store_thought("First thought.", "freeform")
+    engine._store_thought("Second thought.", "creative")
+
+    snapshot = engine._gather_system_snapshot()
+    assert "2" in snapshot  # 2 thoughts today
+
+
+@pytest.mark.asyncio
+async def test_prompt_includes_system_context(tmp_path):
+    """The prompt sent to _call_agent should include system snapshot data."""
+    engine = _make_engine(tmp_path)
+
+    captured_prompts = []
+
+    def capture_agent(prompt):
+        captured_prompts.append(prompt)
+        return "A thought grounded in reality."
+
+    with (
+        patch.object(engine, "_call_agent", side_effect=capture_agent),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        await engine.think_once()
+
+    assert len(captured_prompts) == 1
+    prompt = captured_prompts[0]
+    # Should contain time (from system snapshot)
+    assert "AM" in prompt or "PM" in prompt
+
+
 # ---------------------------------------------------------------------------
 # Dashboard route
 # ---------------------------------------------------------------------------