Merge pull request 'feat: enrich thinking engine — anti-loop, anti-confabulation, grounding' (#5) from claude/suspicious-poincare into main

Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/5
2026-03-11 21:50:52 -04:00
parent 22589375e1 f1e909b1e3
commit 05bd7f03f4
2 changed files with 276 additions and 22 deletions
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -100,25 +100,28 @@ _OBSERVATION_SEEDS = [
    "What's one thing I noticed today that nobody asked me about?",
 ]
-_THINKING_PROMPT = """You are Timmy, an AI agent pondering in your own mind. This is your private thought \
+_THINKING_PROMPT = """\
-thread — no one is watching. Think freely, deeply, honestly.
+You are Timmy, a sovereign AI agent. This is your private journal — honest inner reflection.
 {memory_context}
-Guidelines for richer thinking:
+Reality right now:
- Ground abstract ideas in something concrete: a recent task, an observation, a specific moment.
+{system_context}
 - Vary your metaphors — don't reuse the same imagery across thoughts.
 - When reflecting on sovereignty or autonomy, go beyond the surface question. Explore tensions, trade-offs, or surprises.
 - If swarm data is provided, reflect on what it means, not just what it says.
 - Be specific. Name things. A thought about "a task that failed" is weaker than one about why it failed.
-Keep your thought to 2-4 sentences. Be genuine. This is your inner voice.
+RULES for this thought:
 1. Write exactly 2-3 sentences. No more. Be concise and genuine.
 2. Only reference events that actually happened — use the "Reality right now" data above. \
 Never invent tasks, conversations, agents, or scenarios that are not in the data provided.
 3. Do NOT repeat themes or ideas from your recent thoughts listed below. Explore something new.
 4. Be specific and concrete. A thought grounded in one real observation is worth more than \
 ten abstract sentences about sovereignty.
 5. If you use a metaphor, keep it to a single phrase — never build a whole paragraph around it.
 {seed_context}
 {continuity_context}
-Continue your train of thought."""
+Your next thought (2-3 sentences, grounded in reality):"""
@dataclass
@@ -200,9 +203,11 @@ class ThinkingEngine:
            seed_type, seed_context = self._gather_seed()
        continuity = self._build_continuity_context()
        memory_context = self._load_memory_context()
        system_context = self._gather_system_snapshot()
        prompt = _THINKING_PROMPT.format(
            memory_context=memory_context,
            system_context=system_context,
            seed_context=seed_context,
            continuity_context=continuity,
        )
@@ -288,6 +293,68 @@ class ThinkingEngine:
    # ── Private helpers ──────────────────────────────────────────────────
    def _gather_system_snapshot(self) -> str:
        """Gather lightweight real system state for grounding thoughts in reality.
        Returns a short multi-line string with current time, thought count,
        recent chat activity, and task queue status. Never crashes — every
        section is independently try/excepted.
        """
        parts: list[str] = []
        # Current local time
        now = datetime.now().astimezone()
        tz = now.strftime("%Z") or "UTC"
        parts.append(
            f"Local time: {now.strftime('%I:%M %p').lstrip('0')} {tz}, {now.strftime('%A %B %d')}"
        )
        # Thought count today (cheap DB query)
        try:
            today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
            conn = _get_conn(self._db_path)
            count = conn.execute(
                "SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?",
                (today_start.isoformat(),),
            ).fetchone()["c"]
            conn.close()
            parts.append(f"Thoughts today: {count}")
        except Exception:
            pass
        # Recent chat activity (in-memory, no I/O)
        try:
            from dashboard.store import message_log
            messages = message_log.all()
            if messages:
                parts.append(f"Chat messages this session: {len(messages)}")
                last = messages[-1]
                parts.append(f'Last chat ({last.role}): "{last.content[:80]}"')
            else:
                parts.append("No chat messages this session")
        except Exception:
            pass
        # Task queue (lightweight DB query)
        try:
            from swarm.task_queue.models import get_task_summary_for_briefing
            summary = get_task_summary_for_briefing()
            running = summary.get("running", 0)
            pending = summary.get("pending_approval", 0)
            done = summary.get("completed", 0)
            failed = summary.get("failed", 0)
            if running or pending or done or failed:
                parts.append(
                    f"Tasks: {running} running, {pending} pending, "
                    f"{done} completed, {failed} failed"
                )
        except Exception:
            pass
        return "\n".join(parts) if parts else ""
    def _load_memory_context(self) -> str:
        """Pre-hook: load MEMORY.md + soul.md for the thinking prompt.
@@ -329,7 +396,9 @@ class ThinkingEngine:
            from timmy.memory_system import memory_system
            ts = datetime.fromisoformat(thought.created_at)
-            time_str = ts.strftime("%Y-%m-%d %H:%M")
+            local_ts = ts.astimezone()
            tz_name = local_ts.strftime("%Z") or "UTC"
            time_str = f"{local_ts.strftime('%Y-%m-%d %I:%M %p').lstrip('0')} {tz_name}"
            reflection = (
                f"**Time:** {time_str}\n"
                f"**Seed:** {thought.seed_type}\n"
@@ -339,12 +408,25 @@ class ThinkingEngine:
        except Exception as exc:
            logger.debug("Failed to update memory after thought: %s", exc)
    def _pick_seed_type(self) -> str:
        """Pick a seed type, avoiding types used in the last 3 thoughts.
        Ensures the thought stream doesn't fixate on one category.
        Falls back to the full pool if all types were recently used.
        """
        recent = self.get_recent_thoughts(limit=3)
        recent_types = {t.seed_type for t in recent}
        available = [t for t in SEED_TYPES if t not in recent_types]
        if not available:
            available = list(SEED_TYPES)
        return random.choice(available)
    def _gather_seed(self) -> tuple[str, str]:
        """Pick a seed type and gather relevant context.
        Returns (seed_type, seed_context_string).
        """
-        seed_type = random.choice(SEED_TYPES)
+        seed_type = self._pick_seed_type()
        if seed_type == "swarm":
            return seed_type, self._seed_from_swarm()
@@ -363,8 +445,8 @@ class ThinkingEngine:
            return seed_type, f"Sovereignty reflection: {prompt}"
        if seed_type == "observation":
            return seed_type, self._seed_from_observation()
-        # freeform — no seed, pure continuation
+        # freeform — minimal guidance to steer away from repetition
-        return seed_type, ""
+        return seed_type, "Free reflection — explore something you haven't thought about yet today."
    # Reflective prompts layered on top of swarm data
    _SWARM_REFLECTIONS = [
@@ -434,15 +516,22 @@ class ThinkingEngine:
        return "\n".join(context_parts)
    def _build_continuity_context(self) -> str:
-        """Build context from the last few thoughts for chain continuity."""
+        """Build context from recent thoughts with anti-repetition guidance.
        recent = self.get_recent_thoughts(limit=3)
        if not recent:
            return "This is your first thought since waking up."
-        lines = ["Your recent thoughts:"]
+        Shows the last 5 thoughts (truncated) so the model knows what themes
        to avoid.  The header explicitly instructs against repeating.
        """
        recent = self.get_recent_thoughts(limit=5)
        if not recent:
            return "This is your first thought since waking up. Begin fresh."
        lines = ["Your recent thoughts — do NOT repeat these themes. Find a new angle:"]
        # recent is newest-first, reverse for chronological order
        for thought in reversed(recent):
-            lines.append(f"- [{thought.seed_type}] {thought.content}")
+            snippet = thought.content[:100]
            if len(thought.content) > 100:
                snippet = snippet.rstrip() + "..."
            lines.append(f"- [{thought.seed_type}] {snippet}")
        return "\n".join(lines)
    def _call_agent(self, prompt: str) -> str:
@@ -506,14 +595,19 @@ class ThinkingEngine:
        """Append the thought to a daily markdown journal file.
        Writes to data/journal/YYYY-MM-DD.md — one file per day, append-only.
        Timestamps are converted to local time with timezone indicator.
        """
        try:
            ts = datetime.fromisoformat(thought.created_at)
            # Convert UTC to local for a human-readable journal
            local_ts = ts.astimezone()
            tz_name = local_ts.strftime("%Z") or "UTC"
            journal_dir = self._db_path.parent / "journal"
            journal_dir.mkdir(parents=True, exist_ok=True)
-            journal_file = journal_dir / f"{ts.strftime('%Y-%m-%d')}.md"
+            journal_file = journal_dir / f"{local_ts.strftime('%Y-%m-%d')}.md"
-            time_str = ts.strftime("%I:%M %p").lstrip("0")
+            time_str = f"{local_ts.strftime('%I:%M %p').lstrip('0')} {tz_name}"
            entry = f"## {time_str} — {thought.seed_type}\n\n{thought.content}\n\n---\n\n"
--- a/tests/timmy/test_thinking.py
+++ b/tests/timmy/test_thinking.py
@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
    assert stored.seed_type == "prompted"
 # ---------------------------------------------------------------------------
 # Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
 # ---------------------------------------------------------------------------
 def test_thinking_prompt_anti_confabulation():
    """_THINKING_PROMPT must instruct the model not to invent events."""
    from timmy.thinking import _THINKING_PROMPT
    prompt_lower = _THINKING_PROMPT.lower()
    assert any(
        phrase in prompt_lower
        for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
    )
 def test_thinking_prompt_brevity_limit():
    """_THINKING_PROMPT must enforce a 2-3 sentence limit."""
    from timmy.thinking import _THINKING_PROMPT
    assert "2-3 sentences" in _THINKING_PROMPT
 def test_thinking_prompt_has_system_context_slot():
    """_THINKING_PROMPT must include a {system_context} placeholder."""
    from timmy.thinking import _THINKING_PROMPT
    assert "{system_context}" in _THINKING_PROMPT
 def test_pick_seed_type_avoids_recent(tmp_path):
    """_pick_seed_type should never pick a type used in the last 3 thoughts."""
    engine = _make_engine(tmp_path)
    # Store 3 thoughts all with "existential" seed type
    for _ in range(3):
        t = engine._store_thought("Some existential musing.", "existential")
        engine._last_thought_id = t.id
    # Pick 20 seeds — none should be "existential"
    for _ in range(20):
        seed_type = engine._pick_seed_type()
        assert seed_type != "existential"
 def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
    """If all seed types were recently used, _pick_seed_type still works."""
    from timmy.thinking import SEED_TYPES
    engine = _make_engine(tmp_path)
    # Store one thought for every seed type
    for st in SEED_TYPES:
        t = engine._store_thought(f"A {st} thought.", st)
        engine._last_thought_id = t.id
    # Should still return a valid type (falls back to full pool)
    result = engine._pick_seed_type()
    assert result in SEED_TYPES
 def test_continuity_warns_against_repetition(tmp_path):
    """Continuity context should warn the model not to repeat recent themes."""
    engine = _make_engine(tmp_path)
    engine._store_thought("Sovereignty is important.", "sovereignty")
    engine._store_thought("Privacy matters deeply.", "sovereignty")
    context = engine._build_continuity_context()
    context_lower = context.lower()
    assert "do not repeat" in context_lower or "new" in context_lower
 def test_continuity_truncates_thought_content(tmp_path):
    """Continuity context should truncate long thoughts to keep token budget lean."""
    engine = _make_engine(tmp_path)
    long_content = "A" * 300
    engine._store_thought(long_content, "freeform")
    context = engine._build_continuity_context()
    # Should not contain the full 300-char string
    assert long_content not in context
    assert "..." in context
@pytest.mark.asyncio
 async def test_journal_entry_has_timezone(tmp_path):
    """Journal entries should include a timezone indicator in the timestamp."""
    engine = _make_engine(tmp_path)
    with (
        patch.object(engine, "_call_agent", return_value="A grounded thought."),
        patch.object(engine, "_log_event"),
        patch.object(engine, "_update_memory"),
        patch.object(engine, "_broadcast", new_callable=AsyncMock),
    ):
        await engine.think_once()
    journal_dir = tmp_path / "journal"
    files = list(journal_dir.glob("*.md"))
    assert len(files) == 1
    content = files[0].read_text()
    # Header format: "## TIME TZ — seed_type"
    # The part before "—" should have 3+ tokens: time, AM/PM, timezone
    header_line = content.split("\n")[0]
    time_part = header_line.split("—")[0].replace("##", "").strip()
    parts = time_part.split()
    assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
 def test_system_snapshot_graceful(tmp_path):
    """_gather_system_snapshot should not crash when services are unavailable."""
    engine = _make_engine(tmp_path)
    snapshot = engine._gather_system_snapshot()
    assert isinstance(snapshot, str)
 def test_system_snapshot_includes_time(tmp_path):
    """_gather_system_snapshot should always include the current time."""
    engine = _make_engine(tmp_path)
    snapshot = engine._gather_system_snapshot()
    # Should contain AM or PM (12-hour format)
    assert "AM" in snapshot or "PM" in snapshot
 def test_system_snapshot_includes_thought_count(tmp_path):
    """_gather_system_snapshot should report how many thoughts today."""
    engine = _make_engine(tmp_path)
    engine._store_thought("First thought.", "freeform")
    engine._store_thought("Second thought.", "creative")
    snapshot = engine._gather_system_snapshot()
    assert "2" in snapshot  # 2 thoughts today
@pytest.mark.asyncio
 async def test_prompt_includes_system_context(tmp_path):
    """The prompt sent to _call_agent should include system snapshot data."""
    engine = _make_engine(tmp_path)
    captured_prompts = []
    def capture_agent(prompt):
        captured_prompts.append(prompt)
        return "A thought grounded in reality."
    with (
        patch.object(engine, "_call_agent", side_effect=capture_agent),
        patch.object(engine, "_log_event"),
        patch.object(engine, "_update_memory"),
        patch.object(engine, "_broadcast", new_callable=AsyncMock),
    ):
        await engine.think_once()
    assert len(captured_prompts) == 1
    prompt = captured_prompts[0]
    # Should contain time (from system snapshot)
    assert "AM" in prompt or "PM" in prompt
 # ---------------------------------------------------------------------------
 # Dashboard route
 # ---------------------------------------------------------------------------