feat: enrich thinking engine — anti-loop, anti-confabulation, grounding

Rewrite _THINKING_PROMPT with strict rules: 2-3 sentence limit, anti-confabulation (only reference real data), anti-repetition. - Add _pick_seed_type() with recent-type dedup (excludes last 3) - Add _gather_system_snapshot() for real-time grounding (time, thought count, chat activity, task queue) - Improve _build_continuity_context() with anti-repetition header and 100-char truncation - Fix journal + memory timestamps to include local timezone - 12 new TDD tests covering all improvements Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 21:43:10 -04:00
parent 22589375e1
commit f1e909b1e3
2 changed files with 276 additions and 22 deletions
--- a/tests/timmy/test_thinking.py
+++ b/tests/timmy/test_thinking.py
@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
    assert stored.seed_type == "prompted"


+# ---------------------------------------------------------------------------
+# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
+# ---------------------------------------------------------------------------
+
+
+def test_thinking_prompt_anti_confabulation():
+    """_THINKING_PROMPT must instruct the model not to invent events."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    prompt_lower = _THINKING_PROMPT.lower()
+    assert any(
+        phrase in prompt_lower
+        for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
+    )
+
+
+def test_thinking_prompt_brevity_limit():
+    """_THINKING_PROMPT must enforce a 2-3 sentence limit."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    assert "2-3 sentences" in _THINKING_PROMPT
+
+
+def test_thinking_prompt_has_system_context_slot():
+    """_THINKING_PROMPT must include a {system_context} placeholder."""
+    from timmy.thinking import _THINKING_PROMPT
+
+    assert "{system_context}" in _THINKING_PROMPT
+
+
+def test_pick_seed_type_avoids_recent(tmp_path):
+    """_pick_seed_type should never pick a type used in the last 3 thoughts."""
+    engine = _make_engine(tmp_path)
+
+    # Store 3 thoughts all with "existential" seed type
+    for _ in range(3):
+        t = engine._store_thought("Some existential musing.", "existential")
+        engine._last_thought_id = t.id
+
+    # Pick 20 seeds — none should be "existential"
+    for _ in range(20):
+        seed_type = engine._pick_seed_type()
+        assert seed_type != "existential"
+
+
+def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
+    """If all seed types were recently used, _pick_seed_type still works."""
+    from timmy.thinking import SEED_TYPES
+
+    engine = _make_engine(tmp_path)
+
+    # Store one thought for every seed type
+    for st in SEED_TYPES:
+        t = engine._store_thought(f"A {st} thought.", st)
+        engine._last_thought_id = t.id
+
+    # Should still return a valid type (falls back to full pool)
+    result = engine._pick_seed_type()
+    assert result in SEED_TYPES
+
+
+def test_continuity_warns_against_repetition(tmp_path):
+    """Continuity context should warn the model not to repeat recent themes."""
+    engine = _make_engine(tmp_path)
+    engine._store_thought("Sovereignty is important.", "sovereignty")
+    engine._store_thought("Privacy matters deeply.", "sovereignty")
+
+    context = engine._build_continuity_context()
+    context_lower = context.lower()
+    assert "do not repeat" in context_lower or "new" in context_lower
+
+
+def test_continuity_truncates_thought_content(tmp_path):
+    """Continuity context should truncate long thoughts to keep token budget lean."""
+    engine = _make_engine(tmp_path)
+    long_content = "A" * 300
+    engine._store_thought(long_content, "freeform")
+
+    context = engine._build_continuity_context()
+    # Should not contain the full 300-char string
+    assert long_content not in context
+    assert "..." in context
+
+
+@pytest.mark.asyncio
+async def test_journal_entry_has_timezone(tmp_path):
+    """Journal entries should include a timezone indicator in the timestamp."""
+    engine = _make_engine(tmp_path)
+
+    with (
+        patch.object(engine, "_call_agent", return_value="A grounded thought."),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        await engine.think_once()
+
+    journal_dir = tmp_path / "journal"
+    files = list(journal_dir.glob("*.md"))
+    assert len(files) == 1
+    content = files[0].read_text()
+
+    # Header format: "## TIME TZ — seed_type"
+    # The part before "—" should have 3+ tokens: time, AM/PM, timezone
+    header_line = content.split("\n")[0]
+    time_part = header_line.split("—")[0].replace("##", "").strip()
+    parts = time_part.split()
+    assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
+
+
+def test_system_snapshot_graceful(tmp_path):
+    """_gather_system_snapshot should not crash when services are unavailable."""
+    engine = _make_engine(tmp_path)
+    snapshot = engine._gather_system_snapshot()
+    assert isinstance(snapshot, str)
+
+
+def test_system_snapshot_includes_time(tmp_path):
+    """_gather_system_snapshot should always include the current time."""
+    engine = _make_engine(tmp_path)
+    snapshot = engine._gather_system_snapshot()
+    # Should contain AM or PM (12-hour format)
+    assert "AM" in snapshot or "PM" in snapshot
+
+
+def test_system_snapshot_includes_thought_count(tmp_path):
+    """_gather_system_snapshot should report how many thoughts today."""
+    engine = _make_engine(tmp_path)
+    engine._store_thought("First thought.", "freeform")
+    engine._store_thought("Second thought.", "creative")
+
+    snapshot = engine._gather_system_snapshot()
+    assert "2" in snapshot  # 2 thoughts today
+
+
+@pytest.mark.asyncio
+async def test_prompt_includes_system_context(tmp_path):
+    """The prompt sent to _call_agent should include system snapshot data."""
+    engine = _make_engine(tmp_path)
+
+    captured_prompts = []
+
+    def capture_agent(prompt):
+        captured_prompts.append(prompt)
+        return "A thought grounded in reality."
+
+    with (
+        patch.object(engine, "_call_agent", side_effect=capture_agent),
+        patch.object(engine, "_log_event"),
+        patch.object(engine, "_update_memory"),
+        patch.object(engine, "_broadcast", new_callable=AsyncMock),
+    ):
+        await engine.think_once()
+
+    assert len(captured_prompts) == 1
+    prompt = captured_prompts[0]
+    # Should contain time (from system snapshot)
+    assert "AM" in prompt or "PM" in prompt
+
+
 # ---------------------------------------------------------------------------
 # Dashboard route
 # ---------------------------------------------------------------------------