forked from Rockachopa/Timmy-time-dashboard
feat: enrich thinking engine — anti-loop, anti-confabulation, grounding
Rewrite _THINKING_PROMPT with strict rules: 2-3 sentence limit, anti-confabulation (only reference real data), anti-repetition. - Add _pick_seed_type() with recent-type dedup (excludes last 3) - Add _gather_system_snapshot() for real-time grounding (time, thought count, chat activity, task queue) - Improve _build_continuity_context() with anti-repetition header and 100-char truncation - Fix journal + memory timestamps to include local timezone - 12 new TDD tests covering all improvements Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
|
||||
assert stored.seed_type == "prompted"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_thinking_prompt_anti_confabulation():
|
||||
"""_THINKING_PROMPT must instruct the model not to invent events."""
|
||||
from timmy.thinking import _THINKING_PROMPT
|
||||
|
||||
prompt_lower = _THINKING_PROMPT.lower()
|
||||
assert any(
|
||||
phrase in prompt_lower
|
||||
for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
|
||||
)
|
||||
|
||||
|
||||
def test_thinking_prompt_brevity_limit():
|
||||
"""_THINKING_PROMPT must enforce a 2-3 sentence limit."""
|
||||
from timmy.thinking import _THINKING_PROMPT
|
||||
|
||||
assert "2-3 sentences" in _THINKING_PROMPT
|
||||
|
||||
|
||||
def test_thinking_prompt_has_system_context_slot():
|
||||
"""_THINKING_PROMPT must include a {system_context} placeholder."""
|
||||
from timmy.thinking import _THINKING_PROMPT
|
||||
|
||||
assert "{system_context}" in _THINKING_PROMPT
|
||||
|
||||
|
||||
def test_pick_seed_type_avoids_recent(tmp_path):
|
||||
"""_pick_seed_type should never pick a type used in the last 3 thoughts."""
|
||||
engine = _make_engine(tmp_path)
|
||||
|
||||
# Store 3 thoughts all with "existential" seed type
|
||||
for _ in range(3):
|
||||
t = engine._store_thought("Some existential musing.", "existential")
|
||||
engine._last_thought_id = t.id
|
||||
|
||||
# Pick 20 seeds — none should be "existential"
|
||||
for _ in range(20):
|
||||
seed_type = engine._pick_seed_type()
|
||||
assert seed_type != "existential"
|
||||
|
||||
|
||||
def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
|
||||
"""If all seed types were recently used, _pick_seed_type still works."""
|
||||
from timmy.thinking import SEED_TYPES
|
||||
|
||||
engine = _make_engine(tmp_path)
|
||||
|
||||
# Store one thought for every seed type
|
||||
for st in SEED_TYPES:
|
||||
t = engine._store_thought(f"A {st} thought.", st)
|
||||
engine._last_thought_id = t.id
|
||||
|
||||
# Should still return a valid type (falls back to full pool)
|
||||
result = engine._pick_seed_type()
|
||||
assert result in SEED_TYPES
|
||||
|
||||
|
||||
def test_continuity_warns_against_repetition(tmp_path):
|
||||
"""Continuity context should warn the model not to repeat recent themes."""
|
||||
engine = _make_engine(tmp_path)
|
||||
engine._store_thought("Sovereignty is important.", "sovereignty")
|
||||
engine._store_thought("Privacy matters deeply.", "sovereignty")
|
||||
|
||||
context = engine._build_continuity_context()
|
||||
context_lower = context.lower()
|
||||
assert "do not repeat" in context_lower or "new" in context_lower
|
||||
|
||||
|
||||
def test_continuity_truncates_thought_content(tmp_path):
|
||||
"""Continuity context should truncate long thoughts to keep token budget lean."""
|
||||
engine = _make_engine(tmp_path)
|
||||
long_content = "A" * 300
|
||||
engine._store_thought(long_content, "freeform")
|
||||
|
||||
context = engine._build_continuity_context()
|
||||
# Should not contain the full 300-char string
|
||||
assert long_content not in context
|
||||
assert "..." in context
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_journal_entry_has_timezone(tmp_path):
|
||||
"""Journal entries should include a timezone indicator in the timestamp."""
|
||||
engine = _make_engine(tmp_path)
|
||||
|
||||
with (
|
||||
patch.object(engine, "_call_agent", return_value="A grounded thought."),
|
||||
patch.object(engine, "_log_event"),
|
||||
patch.object(engine, "_update_memory"),
|
||||
patch.object(engine, "_broadcast", new_callable=AsyncMock),
|
||||
):
|
||||
await engine.think_once()
|
||||
|
||||
journal_dir = tmp_path / "journal"
|
||||
files = list(journal_dir.glob("*.md"))
|
||||
assert len(files) == 1
|
||||
content = files[0].read_text()
|
||||
|
||||
# Header format: "## TIME TZ — seed_type"
|
||||
# The part before "—" should have 3+ tokens: time, AM/PM, timezone
|
||||
header_line = content.split("\n")[0]
|
||||
time_part = header_line.split("—")[0].replace("##", "").strip()
|
||||
parts = time_part.split()
|
||||
assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
|
||||
|
||||
|
||||
def test_system_snapshot_graceful(tmp_path):
|
||||
"""_gather_system_snapshot should not crash when services are unavailable."""
|
||||
engine = _make_engine(tmp_path)
|
||||
snapshot = engine._gather_system_snapshot()
|
||||
assert isinstance(snapshot, str)
|
||||
|
||||
|
||||
def test_system_snapshot_includes_time(tmp_path):
|
||||
"""_gather_system_snapshot should always include the current time."""
|
||||
engine = _make_engine(tmp_path)
|
||||
snapshot = engine._gather_system_snapshot()
|
||||
# Should contain AM or PM (12-hour format)
|
||||
assert "AM" in snapshot or "PM" in snapshot
|
||||
|
||||
|
||||
def test_system_snapshot_includes_thought_count(tmp_path):
|
||||
"""_gather_system_snapshot should report how many thoughts today."""
|
||||
engine = _make_engine(tmp_path)
|
||||
engine._store_thought("First thought.", "freeform")
|
||||
engine._store_thought("Second thought.", "creative")
|
||||
|
||||
snapshot = engine._gather_system_snapshot()
|
||||
assert "2" in snapshot # 2 thoughts today
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_prompt_includes_system_context(tmp_path):
|
||||
"""The prompt sent to _call_agent should include system snapshot data."""
|
||||
engine = _make_engine(tmp_path)
|
||||
|
||||
captured_prompts = []
|
||||
|
||||
def capture_agent(prompt):
|
||||
captured_prompts.append(prompt)
|
||||
return "A thought grounded in reality."
|
||||
|
||||
with (
|
||||
patch.object(engine, "_call_agent", side_effect=capture_agent),
|
||||
patch.object(engine, "_log_event"),
|
||||
patch.object(engine, "_update_memory"),
|
||||
patch.object(engine, "_broadcast", new_callable=AsyncMock),
|
||||
):
|
||||
await engine.think_once()
|
||||
|
||||
assert len(captured_prompts) == 1
|
||||
prompt = captured_prompts[0]
|
||||
# Should contain time (from system snapshot)
|
||||
assert "AM" in prompt or "PM" in prompt
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dashboard route
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user