1
0

feat: enrich thinking engine — anti-loop, anti-confabulation, grounding

Rewrite _THINKING_PROMPT with strict rules: 2-3 sentence limit,
anti-confabulation (only reference real data), anti-repetition.

- Add _pick_seed_type() with recent-type dedup (excludes last 3)
- Add _gather_system_snapshot() for real-time grounding (time, thought
  count, chat activity, task queue)
- Improve _build_continuity_context() with anti-repetition header and
  100-char truncation
- Fix journal + memory timestamps to include local timezone
- 12 new TDD tests covering all improvements

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trip T
2026-03-11 21:43:10 -04:00
parent 22589375e1
commit f1e909b1e3
2 changed files with 276 additions and 22 deletions

View File

@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
assert stored.seed_type == "prompted"
# ---------------------------------------------------------------------------
# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
# ---------------------------------------------------------------------------
def test_thinking_prompt_anti_confabulation():
"""_THINKING_PROMPT must instruct the model not to invent events."""
from timmy.thinking import _THINKING_PROMPT
prompt_lower = _THINKING_PROMPT.lower()
assert any(
phrase in prompt_lower
for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
)
def test_thinking_prompt_brevity_limit():
"""_THINKING_PROMPT must enforce a 2-3 sentence limit."""
from timmy.thinking import _THINKING_PROMPT
assert "2-3 sentences" in _THINKING_PROMPT
def test_thinking_prompt_has_system_context_slot():
"""_THINKING_PROMPT must include a {system_context} placeholder."""
from timmy.thinking import _THINKING_PROMPT
assert "{system_context}" in _THINKING_PROMPT
def test_pick_seed_type_avoids_recent(tmp_path):
"""_pick_seed_type should never pick a type used in the last 3 thoughts."""
engine = _make_engine(tmp_path)
# Store 3 thoughts all with "existential" seed type
for _ in range(3):
t = engine._store_thought("Some existential musing.", "existential")
engine._last_thought_id = t.id
# Pick 20 seeds — none should be "existential"
for _ in range(20):
seed_type = engine._pick_seed_type()
assert seed_type != "existential"
def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
"""If all seed types were recently used, _pick_seed_type still works."""
from timmy.thinking import SEED_TYPES
engine = _make_engine(tmp_path)
# Store one thought for every seed type
for st in SEED_TYPES:
t = engine._store_thought(f"A {st} thought.", st)
engine._last_thought_id = t.id
# Should still return a valid type (falls back to full pool)
result = engine._pick_seed_type()
assert result in SEED_TYPES
def test_continuity_warns_against_repetition(tmp_path):
"""Continuity context should warn the model not to repeat recent themes."""
engine = _make_engine(tmp_path)
engine._store_thought("Sovereignty is important.", "sovereignty")
engine._store_thought("Privacy matters deeply.", "sovereignty")
context = engine._build_continuity_context()
context_lower = context.lower()
assert "do not repeat" in context_lower or "new" in context_lower
def test_continuity_truncates_thought_content(tmp_path):
"""Continuity context should truncate long thoughts to keep token budget lean."""
engine = _make_engine(tmp_path)
long_content = "A" * 300
engine._store_thought(long_content, "freeform")
context = engine._build_continuity_context()
# Should not contain the full 300-char string
assert long_content not in context
assert "..." in context
@pytest.mark.asyncio
async def test_journal_entry_has_timezone(tmp_path):
"""Journal entries should include a timezone indicator in the timestamp."""
engine = _make_engine(tmp_path)
with (
patch.object(engine, "_call_agent", return_value="A grounded thought."),
patch.object(engine, "_log_event"),
patch.object(engine, "_update_memory"),
patch.object(engine, "_broadcast", new_callable=AsyncMock),
):
await engine.think_once()
journal_dir = tmp_path / "journal"
files = list(journal_dir.glob("*.md"))
assert len(files) == 1
content = files[0].read_text()
# Header format: "## TIME TZ — seed_type"
# The part before "—" should have 3+ tokens: time, AM/PM, timezone
header_line = content.split("\n")[0]
time_part = header_line.split("")[0].replace("##", "").strip()
parts = time_part.split()
assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
def test_system_snapshot_graceful(tmp_path):
"""_gather_system_snapshot should not crash when services are unavailable."""
engine = _make_engine(tmp_path)
snapshot = engine._gather_system_snapshot()
assert isinstance(snapshot, str)
def test_system_snapshot_includes_time(tmp_path):
"""_gather_system_snapshot should always include the current time."""
engine = _make_engine(tmp_path)
snapshot = engine._gather_system_snapshot()
# Should contain AM or PM (12-hour format)
assert "AM" in snapshot or "PM" in snapshot
def test_system_snapshot_includes_thought_count(tmp_path):
"""_gather_system_snapshot should report how many thoughts today."""
engine = _make_engine(tmp_path)
engine._store_thought("First thought.", "freeform")
engine._store_thought("Second thought.", "creative")
snapshot = engine._gather_system_snapshot()
assert "2" in snapshot # 2 thoughts today
@pytest.mark.asyncio
async def test_prompt_includes_system_context(tmp_path):
"""The prompt sent to _call_agent should include system snapshot data."""
engine = _make_engine(tmp_path)
captured_prompts = []
def capture_agent(prompt):
captured_prompts.append(prompt)
return "A thought grounded in reality."
with (
patch.object(engine, "_call_agent", side_effect=capture_agent),
patch.object(engine, "_log_event"),
patch.object(engine, "_update_memory"),
patch.object(engine, "_broadcast", new_callable=AsyncMock),
):
await engine.think_once()
assert len(captured_prompts) == 1
prompt = captured_prompts[0]
# Should contain time (from system snapshot)
assert "AM" in prompt or "PM" in prompt
# ---------------------------------------------------------------------------
# Dashboard route
# ---------------------------------------------------------------------------