From f1e909b1e3d8268f98595a4904644a291854f396 Mon Sep 17 00:00:00 2001 From: Trip T Date: Wed, 11 Mar 2026 21:43:10 -0400 Subject: [PATCH] =?UTF-8?q?feat:=20enrich=20thinking=20engine=20=E2=80=94?= =?UTF-8?q?=20anti-loop,=20anti-confabulation,=20grounding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite _THINKING_PROMPT with strict rules: 2-3 sentence limit, anti-confabulation (only reference real data), anti-repetition. - Add _pick_seed_type() with recent-type dedup (excludes last 3) - Add _gather_system_snapshot() for real-time grounding (time, thought count, chat activity, task queue) - Improve _build_continuity_context() with anti-repetition header and 100-char truncation - Fix journal + memory timestamps to include local timezone - 12 new TDD tests covering all improvements Co-Authored-By: Claude Opus 4.6 --- src/timmy/thinking.py | 138 +++++++++++++++++++++++++----- tests/timmy/test_thinking.py | 160 +++++++++++++++++++++++++++++++++++ 2 files changed, 276 insertions(+), 22 deletions(-) diff --git a/src/timmy/thinking.py b/src/timmy/thinking.py index d8de233..83c003c 100644 --- a/src/timmy/thinking.py +++ b/src/timmy/thinking.py @@ -100,25 +100,28 @@ _OBSERVATION_SEEDS = [ "What's one thing I noticed today that nobody asked me about?", ] -_THINKING_PROMPT = """You are Timmy, an AI agent pondering in your own mind. This is your private thought \ -thread — no one is watching. Think freely, deeply, honestly. +_THINKING_PROMPT = """\ +You are Timmy, a sovereign AI agent. This is your private journal — honest inner reflection. {memory_context} -Guidelines for richer thinking: -- Ground abstract ideas in something concrete: a recent task, an observation, a specific moment. -- Vary your metaphors — don't reuse the same imagery across thoughts. -- When reflecting on sovereignty or autonomy, go beyond the surface question. Explore tensions, trade-offs, or surprises. -- If swarm data is provided, reflect on what it means, not just what it says. -- Be specific. Name things. A thought about "a task that failed" is weaker than one about why it failed. +Reality right now: +{system_context} -Keep your thought to 2-4 sentences. Be genuine. This is your inner voice. +RULES for this thought: +1. Write exactly 2-3 sentences. No more. Be concise and genuine. +2. Only reference events that actually happened — use the "Reality right now" data above. \ +Never invent tasks, conversations, agents, or scenarios that are not in the data provided. +3. Do NOT repeat themes or ideas from your recent thoughts listed below. Explore something new. +4. Be specific and concrete. A thought grounded in one real observation is worth more than \ +ten abstract sentences about sovereignty. +5. If you use a metaphor, keep it to a single phrase — never build a whole paragraph around it. {seed_context} {continuity_context} -Continue your train of thought.""" +Your next thought (2-3 sentences, grounded in reality):""" @dataclass @@ -200,9 +203,11 @@ class ThinkingEngine: seed_type, seed_context = self._gather_seed() continuity = self._build_continuity_context() memory_context = self._load_memory_context() + system_context = self._gather_system_snapshot() prompt = _THINKING_PROMPT.format( memory_context=memory_context, + system_context=system_context, seed_context=seed_context, continuity_context=continuity, ) @@ -288,6 +293,68 @@ class ThinkingEngine: # ── Private helpers ────────────────────────────────────────────────── + def _gather_system_snapshot(self) -> str: + """Gather lightweight real system state for grounding thoughts in reality. + + Returns a short multi-line string with current time, thought count, + recent chat activity, and task queue status. Never crashes — every + section is independently try/excepted. + """ + parts: list[str] = [] + + # Current local time + now = datetime.now().astimezone() + tz = now.strftime("%Z") or "UTC" + parts.append( + f"Local time: {now.strftime('%I:%M %p').lstrip('0')} {tz}, {now.strftime('%A %B %d')}" + ) + + # Thought count today (cheap DB query) + try: + today_start = now.replace(hour=0, minute=0, second=0, microsecond=0) + conn = _get_conn(self._db_path) + count = conn.execute( + "SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?", + (today_start.isoformat(),), + ).fetchone()["c"] + conn.close() + parts.append(f"Thoughts today: {count}") + except Exception: + pass + + # Recent chat activity (in-memory, no I/O) + try: + from dashboard.store import message_log + + messages = message_log.all() + if messages: + parts.append(f"Chat messages this session: {len(messages)}") + last = messages[-1] + parts.append(f'Last chat ({last.role}): "{last.content[:80]}"') + else: + parts.append("No chat messages this session") + except Exception: + pass + + # Task queue (lightweight DB query) + try: + from swarm.task_queue.models import get_task_summary_for_briefing + + summary = get_task_summary_for_briefing() + running = summary.get("running", 0) + pending = summary.get("pending_approval", 0) + done = summary.get("completed", 0) + failed = summary.get("failed", 0) + if running or pending or done or failed: + parts.append( + f"Tasks: {running} running, {pending} pending, " + f"{done} completed, {failed} failed" + ) + except Exception: + pass + + return "\n".join(parts) if parts else "" + def _load_memory_context(self) -> str: """Pre-hook: load MEMORY.md + soul.md for the thinking prompt. @@ -329,7 +396,9 @@ class ThinkingEngine: from timmy.memory_system import memory_system ts = datetime.fromisoformat(thought.created_at) - time_str = ts.strftime("%Y-%m-%d %H:%M") + local_ts = ts.astimezone() + tz_name = local_ts.strftime("%Z") or "UTC" + time_str = f"{local_ts.strftime('%Y-%m-%d %I:%M %p').lstrip('0')} {tz_name}" reflection = ( f"**Time:** {time_str}\n" f"**Seed:** {thought.seed_type}\n" @@ -339,12 +408,25 @@ class ThinkingEngine: except Exception as exc: logger.debug("Failed to update memory after thought: %s", exc) + def _pick_seed_type(self) -> str: + """Pick a seed type, avoiding types used in the last 3 thoughts. + + Ensures the thought stream doesn't fixate on one category. + Falls back to the full pool if all types were recently used. + """ + recent = self.get_recent_thoughts(limit=3) + recent_types = {t.seed_type for t in recent} + available = [t for t in SEED_TYPES if t not in recent_types] + if not available: + available = list(SEED_TYPES) + return random.choice(available) + def _gather_seed(self) -> tuple[str, str]: """Pick a seed type and gather relevant context. Returns (seed_type, seed_context_string). """ - seed_type = random.choice(SEED_TYPES) + seed_type = self._pick_seed_type() if seed_type == "swarm": return seed_type, self._seed_from_swarm() @@ -363,8 +445,8 @@ class ThinkingEngine: return seed_type, f"Sovereignty reflection: {prompt}" if seed_type == "observation": return seed_type, self._seed_from_observation() - # freeform — no seed, pure continuation - return seed_type, "" + # freeform — minimal guidance to steer away from repetition + return seed_type, "Free reflection — explore something you haven't thought about yet today." # Reflective prompts layered on top of swarm data _SWARM_REFLECTIONS = [ @@ -434,15 +516,22 @@ class ThinkingEngine: return "\n".join(context_parts) def _build_continuity_context(self) -> str: - """Build context from the last few thoughts for chain continuity.""" - recent = self.get_recent_thoughts(limit=3) - if not recent: - return "This is your first thought since waking up." + """Build context from recent thoughts with anti-repetition guidance. - lines = ["Your recent thoughts:"] + Shows the last 5 thoughts (truncated) so the model knows what themes + to avoid. The header explicitly instructs against repeating. + """ + recent = self.get_recent_thoughts(limit=5) + if not recent: + return "This is your first thought since waking up. Begin fresh." + + lines = ["Your recent thoughts — do NOT repeat these themes. Find a new angle:"] # recent is newest-first, reverse for chronological order for thought in reversed(recent): - lines.append(f"- [{thought.seed_type}] {thought.content}") + snippet = thought.content[:100] + if len(thought.content) > 100: + snippet = snippet.rstrip() + "..." + lines.append(f"- [{thought.seed_type}] {snippet}") return "\n".join(lines) def _call_agent(self, prompt: str) -> str: @@ -506,14 +595,19 @@ class ThinkingEngine: """Append the thought to a daily markdown journal file. Writes to data/journal/YYYY-MM-DD.md — one file per day, append-only. + Timestamps are converted to local time with timezone indicator. """ try: ts = datetime.fromisoformat(thought.created_at) + # Convert UTC to local for a human-readable journal + local_ts = ts.astimezone() + tz_name = local_ts.strftime("%Z") or "UTC" + journal_dir = self._db_path.parent / "journal" journal_dir.mkdir(parents=True, exist_ok=True) - journal_file = journal_dir / f"{ts.strftime('%Y-%m-%d')}.md" - time_str = ts.strftime("%I:%M %p").lstrip("0") + journal_file = journal_dir / f"{local_ts.strftime('%Y-%m-%d')}.md" + time_str = f"{local_ts.strftime('%I:%M %p').lstrip('0')} {tz_name}" entry = f"## {time_str} — {thought.seed_type}\n\n{thought.content}\n\n---\n\n" diff --git a/tests/timmy/test_thinking.py b/tests/timmy/test_thinking.py index 3826269..d441847 100644 --- a/tests/timmy/test_thinking.py +++ b/tests/timmy/test_thinking.py @@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path): assert stored.seed_type == "prompted" +# --------------------------------------------------------------------------- +# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps +# --------------------------------------------------------------------------- + + +def test_thinking_prompt_anti_confabulation(): + """_THINKING_PROMPT must instruct the model not to invent events.""" + from timmy.thinking import _THINKING_PROMPT + + prompt_lower = _THINKING_PROMPT.lower() + assert any( + phrase in prompt_lower + for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"] + ) + + +def test_thinking_prompt_brevity_limit(): + """_THINKING_PROMPT must enforce a 2-3 sentence limit.""" + from timmy.thinking import _THINKING_PROMPT + + assert "2-3 sentences" in _THINKING_PROMPT + + +def test_thinking_prompt_has_system_context_slot(): + """_THINKING_PROMPT must include a {system_context} placeholder.""" + from timmy.thinking import _THINKING_PROMPT + + assert "{system_context}" in _THINKING_PROMPT + + +def test_pick_seed_type_avoids_recent(tmp_path): + """_pick_seed_type should never pick a type used in the last 3 thoughts.""" + engine = _make_engine(tmp_path) + + # Store 3 thoughts all with "existential" seed type + for _ in range(3): + t = engine._store_thought("Some existential musing.", "existential") + engine._last_thought_id = t.id + + # Pick 20 seeds — none should be "existential" + for _ in range(20): + seed_type = engine._pick_seed_type() + assert seed_type != "existential" + + +def test_pick_seed_type_resets_when_all_exhausted(tmp_path): + """If all seed types were recently used, _pick_seed_type still works.""" + from timmy.thinking import SEED_TYPES + + engine = _make_engine(tmp_path) + + # Store one thought for every seed type + for st in SEED_TYPES: + t = engine._store_thought(f"A {st} thought.", st) + engine._last_thought_id = t.id + + # Should still return a valid type (falls back to full pool) + result = engine._pick_seed_type() + assert result in SEED_TYPES + + +def test_continuity_warns_against_repetition(tmp_path): + """Continuity context should warn the model not to repeat recent themes.""" + engine = _make_engine(tmp_path) + engine._store_thought("Sovereignty is important.", "sovereignty") + engine._store_thought("Privacy matters deeply.", "sovereignty") + + context = engine._build_continuity_context() + context_lower = context.lower() + assert "do not repeat" in context_lower or "new" in context_lower + + +def test_continuity_truncates_thought_content(tmp_path): + """Continuity context should truncate long thoughts to keep token budget lean.""" + engine = _make_engine(tmp_path) + long_content = "A" * 300 + engine._store_thought(long_content, "freeform") + + context = engine._build_continuity_context() + # Should not contain the full 300-char string + assert long_content not in context + assert "..." in context + + +@pytest.mark.asyncio +async def test_journal_entry_has_timezone(tmp_path): + """Journal entries should include a timezone indicator in the timestamp.""" + engine = _make_engine(tmp_path) + + with ( + patch.object(engine, "_call_agent", return_value="A grounded thought."), + patch.object(engine, "_log_event"), + patch.object(engine, "_update_memory"), + patch.object(engine, "_broadcast", new_callable=AsyncMock), + ): + await engine.think_once() + + journal_dir = tmp_path / "journal" + files = list(journal_dir.glob("*.md")) + assert len(files) == 1 + content = files[0].read_text() + + # Header format: "## TIME TZ — seed_type" + # The part before "—" should have 3+ tokens: time, AM/PM, timezone + header_line = content.split("\n")[0] + time_part = header_line.split("—")[0].replace("##", "").strip() + parts = time_part.split() + assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'" + + +def test_system_snapshot_graceful(tmp_path): + """_gather_system_snapshot should not crash when services are unavailable.""" + engine = _make_engine(tmp_path) + snapshot = engine._gather_system_snapshot() + assert isinstance(snapshot, str) + + +def test_system_snapshot_includes_time(tmp_path): + """_gather_system_snapshot should always include the current time.""" + engine = _make_engine(tmp_path) + snapshot = engine._gather_system_snapshot() + # Should contain AM or PM (12-hour format) + assert "AM" in snapshot or "PM" in snapshot + + +def test_system_snapshot_includes_thought_count(tmp_path): + """_gather_system_snapshot should report how many thoughts today.""" + engine = _make_engine(tmp_path) + engine._store_thought("First thought.", "freeform") + engine._store_thought("Second thought.", "creative") + + snapshot = engine._gather_system_snapshot() + assert "2" in snapshot # 2 thoughts today + + +@pytest.mark.asyncio +async def test_prompt_includes_system_context(tmp_path): + """The prompt sent to _call_agent should include system snapshot data.""" + engine = _make_engine(tmp_path) + + captured_prompts = [] + + def capture_agent(prompt): + captured_prompts.append(prompt) + return "A thought grounded in reality." + + with ( + patch.object(engine, "_call_agent", side_effect=capture_agent), + patch.object(engine, "_log_event"), + patch.object(engine, "_update_memory"), + patch.object(engine, "_broadcast", new_callable=AsyncMock), + ): + await engine.think_once() + + assert len(captured_prompts) == 1 + prompt = captured_prompts[0] + # Should contain time (from system snapshot) + assert "AM" in prompt or "PM" in prompt + + # --------------------------------------------------------------------------- # Dashboard route # ---------------------------------------------------------------------------