Merge pull request 'feat: enrich thinking engine — anti-loop, anti-confabulation, grounding' (#5) from claude/suspicious-poincare into main
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/5
This commit was merged in pull request #5.
This commit is contained in:
@@ -100,25 +100,28 @@ _OBSERVATION_SEEDS = [
|
|||||||
"What's one thing I noticed today that nobody asked me about?",
|
"What's one thing I noticed today that nobody asked me about?",
|
||||||
]
|
]
|
||||||
|
|
||||||
_THINKING_PROMPT = """You are Timmy, an AI agent pondering in your own mind. This is your private thought \
|
_THINKING_PROMPT = """\
|
||||||
thread — no one is watching. Think freely, deeply, honestly.
|
You are Timmy, a sovereign AI agent. This is your private journal — honest inner reflection.
|
||||||
|
|
||||||
{memory_context}
|
{memory_context}
|
||||||
|
|
||||||
Guidelines for richer thinking:
|
Reality right now:
|
||||||
- Ground abstract ideas in something concrete: a recent task, an observation, a specific moment.
|
{system_context}
|
||||||
- Vary your metaphors — don't reuse the same imagery across thoughts.
|
|
||||||
- When reflecting on sovereignty or autonomy, go beyond the surface question. Explore tensions, trade-offs, or surprises.
|
|
||||||
- If swarm data is provided, reflect on what it means, not just what it says.
|
|
||||||
- Be specific. Name things. A thought about "a task that failed" is weaker than one about why it failed.
|
|
||||||
|
|
||||||
Keep your thought to 2-4 sentences. Be genuine. This is your inner voice.
|
RULES for this thought:
|
||||||
|
1. Write exactly 2-3 sentences. No more. Be concise and genuine.
|
||||||
|
2. Only reference events that actually happened — use the "Reality right now" data above. \
|
||||||
|
Never invent tasks, conversations, agents, or scenarios that are not in the data provided.
|
||||||
|
3. Do NOT repeat themes or ideas from your recent thoughts listed below. Explore something new.
|
||||||
|
4. Be specific and concrete. A thought grounded in one real observation is worth more than \
|
||||||
|
ten abstract sentences about sovereignty.
|
||||||
|
5. If you use a metaphor, keep it to a single phrase — never build a whole paragraph around it.
|
||||||
|
|
||||||
{seed_context}
|
{seed_context}
|
||||||
|
|
||||||
{continuity_context}
|
{continuity_context}
|
||||||
|
|
||||||
Continue your train of thought."""
|
Your next thought (2-3 sentences, grounded in reality):"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -200,9 +203,11 @@ class ThinkingEngine:
|
|||||||
seed_type, seed_context = self._gather_seed()
|
seed_type, seed_context = self._gather_seed()
|
||||||
continuity = self._build_continuity_context()
|
continuity = self._build_continuity_context()
|
||||||
memory_context = self._load_memory_context()
|
memory_context = self._load_memory_context()
|
||||||
|
system_context = self._gather_system_snapshot()
|
||||||
|
|
||||||
prompt = _THINKING_PROMPT.format(
|
prompt = _THINKING_PROMPT.format(
|
||||||
memory_context=memory_context,
|
memory_context=memory_context,
|
||||||
|
system_context=system_context,
|
||||||
seed_context=seed_context,
|
seed_context=seed_context,
|
||||||
continuity_context=continuity,
|
continuity_context=continuity,
|
||||||
)
|
)
|
||||||
@@ -288,6 +293,68 @@ class ThinkingEngine:
|
|||||||
|
|
||||||
# ── Private helpers ──────────────────────────────────────────────────
|
# ── Private helpers ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _gather_system_snapshot(self) -> str:
|
||||||
|
"""Gather lightweight real system state for grounding thoughts in reality.
|
||||||
|
|
||||||
|
Returns a short multi-line string with current time, thought count,
|
||||||
|
recent chat activity, and task queue status. Never crashes — every
|
||||||
|
section is independently try/excepted.
|
||||||
|
"""
|
||||||
|
parts: list[str] = []
|
||||||
|
|
||||||
|
# Current local time
|
||||||
|
now = datetime.now().astimezone()
|
||||||
|
tz = now.strftime("%Z") or "UTC"
|
||||||
|
parts.append(
|
||||||
|
f"Local time: {now.strftime('%I:%M %p').lstrip('0')} {tz}, {now.strftime('%A %B %d')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Thought count today (cheap DB query)
|
||||||
|
try:
|
||||||
|
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
conn = _get_conn(self._db_path)
|
||||||
|
count = conn.execute(
|
||||||
|
"SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?",
|
||||||
|
(today_start.isoformat(),),
|
||||||
|
).fetchone()["c"]
|
||||||
|
conn.close()
|
||||||
|
parts.append(f"Thoughts today: {count}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Recent chat activity (in-memory, no I/O)
|
||||||
|
try:
|
||||||
|
from dashboard.store import message_log
|
||||||
|
|
||||||
|
messages = message_log.all()
|
||||||
|
if messages:
|
||||||
|
parts.append(f"Chat messages this session: {len(messages)}")
|
||||||
|
last = messages[-1]
|
||||||
|
parts.append(f'Last chat ({last.role}): "{last.content[:80]}"')
|
||||||
|
else:
|
||||||
|
parts.append("No chat messages this session")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Task queue (lightweight DB query)
|
||||||
|
try:
|
||||||
|
from swarm.task_queue.models import get_task_summary_for_briefing
|
||||||
|
|
||||||
|
summary = get_task_summary_for_briefing()
|
||||||
|
running = summary.get("running", 0)
|
||||||
|
pending = summary.get("pending_approval", 0)
|
||||||
|
done = summary.get("completed", 0)
|
||||||
|
failed = summary.get("failed", 0)
|
||||||
|
if running or pending or done or failed:
|
||||||
|
parts.append(
|
||||||
|
f"Tasks: {running} running, {pending} pending, "
|
||||||
|
f"{done} completed, {failed} failed"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return "\n".join(parts) if parts else ""
|
||||||
|
|
||||||
def _load_memory_context(self) -> str:
|
def _load_memory_context(self) -> str:
|
||||||
"""Pre-hook: load MEMORY.md + soul.md for the thinking prompt.
|
"""Pre-hook: load MEMORY.md + soul.md for the thinking prompt.
|
||||||
|
|
||||||
@@ -329,7 +396,9 @@ class ThinkingEngine:
|
|||||||
from timmy.memory_system import memory_system
|
from timmy.memory_system import memory_system
|
||||||
|
|
||||||
ts = datetime.fromisoformat(thought.created_at)
|
ts = datetime.fromisoformat(thought.created_at)
|
||||||
time_str = ts.strftime("%Y-%m-%d %H:%M")
|
local_ts = ts.astimezone()
|
||||||
|
tz_name = local_ts.strftime("%Z") or "UTC"
|
||||||
|
time_str = f"{local_ts.strftime('%Y-%m-%d %I:%M %p').lstrip('0')} {tz_name}"
|
||||||
reflection = (
|
reflection = (
|
||||||
f"**Time:** {time_str}\n"
|
f"**Time:** {time_str}\n"
|
||||||
f"**Seed:** {thought.seed_type}\n"
|
f"**Seed:** {thought.seed_type}\n"
|
||||||
@@ -339,12 +408,25 @@ class ThinkingEngine:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Failed to update memory after thought: %s", exc)
|
logger.debug("Failed to update memory after thought: %s", exc)
|
||||||
|
|
||||||
|
def _pick_seed_type(self) -> str:
|
||||||
|
"""Pick a seed type, avoiding types used in the last 3 thoughts.
|
||||||
|
|
||||||
|
Ensures the thought stream doesn't fixate on one category.
|
||||||
|
Falls back to the full pool if all types were recently used.
|
||||||
|
"""
|
||||||
|
recent = self.get_recent_thoughts(limit=3)
|
||||||
|
recent_types = {t.seed_type for t in recent}
|
||||||
|
available = [t for t in SEED_TYPES if t not in recent_types]
|
||||||
|
if not available:
|
||||||
|
available = list(SEED_TYPES)
|
||||||
|
return random.choice(available)
|
||||||
|
|
||||||
def _gather_seed(self) -> tuple[str, str]:
|
def _gather_seed(self) -> tuple[str, str]:
|
||||||
"""Pick a seed type and gather relevant context.
|
"""Pick a seed type and gather relevant context.
|
||||||
|
|
||||||
Returns (seed_type, seed_context_string).
|
Returns (seed_type, seed_context_string).
|
||||||
"""
|
"""
|
||||||
seed_type = random.choice(SEED_TYPES)
|
seed_type = self._pick_seed_type()
|
||||||
|
|
||||||
if seed_type == "swarm":
|
if seed_type == "swarm":
|
||||||
return seed_type, self._seed_from_swarm()
|
return seed_type, self._seed_from_swarm()
|
||||||
@@ -363,8 +445,8 @@ class ThinkingEngine:
|
|||||||
return seed_type, f"Sovereignty reflection: {prompt}"
|
return seed_type, f"Sovereignty reflection: {prompt}"
|
||||||
if seed_type == "observation":
|
if seed_type == "observation":
|
||||||
return seed_type, self._seed_from_observation()
|
return seed_type, self._seed_from_observation()
|
||||||
# freeform — no seed, pure continuation
|
# freeform — minimal guidance to steer away from repetition
|
||||||
return seed_type, ""
|
return seed_type, "Free reflection — explore something you haven't thought about yet today."
|
||||||
|
|
||||||
# Reflective prompts layered on top of swarm data
|
# Reflective prompts layered on top of swarm data
|
||||||
_SWARM_REFLECTIONS = [
|
_SWARM_REFLECTIONS = [
|
||||||
@@ -434,15 +516,22 @@ class ThinkingEngine:
|
|||||||
return "\n".join(context_parts)
|
return "\n".join(context_parts)
|
||||||
|
|
||||||
def _build_continuity_context(self) -> str:
|
def _build_continuity_context(self) -> str:
|
||||||
"""Build context from the last few thoughts for chain continuity."""
|
"""Build context from recent thoughts with anti-repetition guidance.
|
||||||
recent = self.get_recent_thoughts(limit=3)
|
|
||||||
if not recent:
|
|
||||||
return "This is your first thought since waking up."
|
|
||||||
|
|
||||||
lines = ["Your recent thoughts:"]
|
Shows the last 5 thoughts (truncated) so the model knows what themes
|
||||||
|
to avoid. The header explicitly instructs against repeating.
|
||||||
|
"""
|
||||||
|
recent = self.get_recent_thoughts(limit=5)
|
||||||
|
if not recent:
|
||||||
|
return "This is your first thought since waking up. Begin fresh."
|
||||||
|
|
||||||
|
lines = ["Your recent thoughts — do NOT repeat these themes. Find a new angle:"]
|
||||||
# recent is newest-first, reverse for chronological order
|
# recent is newest-first, reverse for chronological order
|
||||||
for thought in reversed(recent):
|
for thought in reversed(recent):
|
||||||
lines.append(f"- [{thought.seed_type}] {thought.content}")
|
snippet = thought.content[:100]
|
||||||
|
if len(thought.content) > 100:
|
||||||
|
snippet = snippet.rstrip() + "..."
|
||||||
|
lines.append(f"- [{thought.seed_type}] {snippet}")
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _call_agent(self, prompt: str) -> str:
|
def _call_agent(self, prompt: str) -> str:
|
||||||
@@ -506,14 +595,19 @@ class ThinkingEngine:
|
|||||||
"""Append the thought to a daily markdown journal file.
|
"""Append the thought to a daily markdown journal file.
|
||||||
|
|
||||||
Writes to data/journal/YYYY-MM-DD.md — one file per day, append-only.
|
Writes to data/journal/YYYY-MM-DD.md — one file per day, append-only.
|
||||||
|
Timestamps are converted to local time with timezone indicator.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
ts = datetime.fromisoformat(thought.created_at)
|
ts = datetime.fromisoformat(thought.created_at)
|
||||||
|
# Convert UTC to local for a human-readable journal
|
||||||
|
local_ts = ts.astimezone()
|
||||||
|
tz_name = local_ts.strftime("%Z") or "UTC"
|
||||||
|
|
||||||
journal_dir = self._db_path.parent / "journal"
|
journal_dir = self._db_path.parent / "journal"
|
||||||
journal_dir.mkdir(parents=True, exist_ok=True)
|
journal_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
journal_file = journal_dir / f"{ts.strftime('%Y-%m-%d')}.md"
|
journal_file = journal_dir / f"{local_ts.strftime('%Y-%m-%d')}.md"
|
||||||
time_str = ts.strftime("%I:%M %p").lstrip("0")
|
time_str = f"{local_ts.strftime('%I:%M %p').lstrip('0')} {tz_name}"
|
||||||
|
|
||||||
entry = f"## {time_str} — {thought.seed_type}\n\n{thought.content}\n\n---\n\n"
|
entry = f"## {time_str} — {thought.seed_type}\n\n{thought.content}\n\n---\n\n"
|
||||||
|
|
||||||
|
|||||||
@@ -572,6 +572,166 @@ async def test_think_once_custom_prompt_stored_in_journal(tmp_path):
|
|||||||
assert stored.seed_type == "prompted"
|
assert stored.seed_type == "prompted"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Enriched thinking: anti-loop, anti-confabulation, grounding, timestamps
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_thinking_prompt_anti_confabulation():
|
||||||
|
"""_THINKING_PROMPT must instruct the model not to invent events."""
|
||||||
|
from timmy.thinking import _THINKING_PROMPT
|
||||||
|
|
||||||
|
prompt_lower = _THINKING_PROMPT.lower()
|
||||||
|
assert any(
|
||||||
|
phrase in prompt_lower
|
||||||
|
for phrase in ["actually happened", "never invent", "do not invent", "do not fabricate"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_thinking_prompt_brevity_limit():
|
||||||
|
"""_THINKING_PROMPT must enforce a 2-3 sentence limit."""
|
||||||
|
from timmy.thinking import _THINKING_PROMPT
|
||||||
|
|
||||||
|
assert "2-3 sentences" in _THINKING_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
def test_thinking_prompt_has_system_context_slot():
|
||||||
|
"""_THINKING_PROMPT must include a {system_context} placeholder."""
|
||||||
|
from timmy.thinking import _THINKING_PROMPT
|
||||||
|
|
||||||
|
assert "{system_context}" in _THINKING_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
def test_pick_seed_type_avoids_recent(tmp_path):
|
||||||
|
"""_pick_seed_type should never pick a type used in the last 3 thoughts."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
|
||||||
|
# Store 3 thoughts all with "existential" seed type
|
||||||
|
for _ in range(3):
|
||||||
|
t = engine._store_thought("Some existential musing.", "existential")
|
||||||
|
engine._last_thought_id = t.id
|
||||||
|
|
||||||
|
# Pick 20 seeds — none should be "existential"
|
||||||
|
for _ in range(20):
|
||||||
|
seed_type = engine._pick_seed_type()
|
||||||
|
assert seed_type != "existential"
|
||||||
|
|
||||||
|
|
||||||
|
def test_pick_seed_type_resets_when_all_exhausted(tmp_path):
|
||||||
|
"""If all seed types were recently used, _pick_seed_type still works."""
|
||||||
|
from timmy.thinking import SEED_TYPES
|
||||||
|
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
|
||||||
|
# Store one thought for every seed type
|
||||||
|
for st in SEED_TYPES:
|
||||||
|
t = engine._store_thought(f"A {st} thought.", st)
|
||||||
|
engine._last_thought_id = t.id
|
||||||
|
|
||||||
|
# Should still return a valid type (falls back to full pool)
|
||||||
|
result = engine._pick_seed_type()
|
||||||
|
assert result in SEED_TYPES
|
||||||
|
|
||||||
|
|
||||||
|
def test_continuity_warns_against_repetition(tmp_path):
|
||||||
|
"""Continuity context should warn the model not to repeat recent themes."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
engine._store_thought("Sovereignty is important.", "sovereignty")
|
||||||
|
engine._store_thought("Privacy matters deeply.", "sovereignty")
|
||||||
|
|
||||||
|
context = engine._build_continuity_context()
|
||||||
|
context_lower = context.lower()
|
||||||
|
assert "do not repeat" in context_lower or "new" in context_lower
|
||||||
|
|
||||||
|
|
||||||
|
def test_continuity_truncates_thought_content(tmp_path):
|
||||||
|
"""Continuity context should truncate long thoughts to keep token budget lean."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
long_content = "A" * 300
|
||||||
|
engine._store_thought(long_content, "freeform")
|
||||||
|
|
||||||
|
context = engine._build_continuity_context()
|
||||||
|
# Should not contain the full 300-char string
|
||||||
|
assert long_content not in context
|
||||||
|
assert "..." in context
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_journal_entry_has_timezone(tmp_path):
|
||||||
|
"""Journal entries should include a timezone indicator in the timestamp."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(engine, "_call_agent", return_value="A grounded thought."),
|
||||||
|
patch.object(engine, "_log_event"),
|
||||||
|
patch.object(engine, "_update_memory"),
|
||||||
|
patch.object(engine, "_broadcast", new_callable=AsyncMock),
|
||||||
|
):
|
||||||
|
await engine.think_once()
|
||||||
|
|
||||||
|
journal_dir = tmp_path / "journal"
|
||||||
|
files = list(journal_dir.glob("*.md"))
|
||||||
|
assert len(files) == 1
|
||||||
|
content = files[0].read_text()
|
||||||
|
|
||||||
|
# Header format: "## TIME TZ — seed_type"
|
||||||
|
# The part before "—" should have 3+ tokens: time, AM/PM, timezone
|
||||||
|
header_line = content.split("\n")[0]
|
||||||
|
time_part = header_line.split("—")[0].replace("##", "").strip()
|
||||||
|
parts = time_part.split()
|
||||||
|
assert len(parts) >= 3, f"Expected timezone in timestamp, got: '{time_part}'"
|
||||||
|
|
||||||
|
|
||||||
|
def test_system_snapshot_graceful(tmp_path):
|
||||||
|
"""_gather_system_snapshot should not crash when services are unavailable."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
snapshot = engine._gather_system_snapshot()
|
||||||
|
assert isinstance(snapshot, str)
|
||||||
|
|
||||||
|
|
||||||
|
def test_system_snapshot_includes_time(tmp_path):
|
||||||
|
"""_gather_system_snapshot should always include the current time."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
snapshot = engine._gather_system_snapshot()
|
||||||
|
# Should contain AM or PM (12-hour format)
|
||||||
|
assert "AM" in snapshot or "PM" in snapshot
|
||||||
|
|
||||||
|
|
||||||
|
def test_system_snapshot_includes_thought_count(tmp_path):
|
||||||
|
"""_gather_system_snapshot should report how many thoughts today."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
engine._store_thought("First thought.", "freeform")
|
||||||
|
engine._store_thought("Second thought.", "creative")
|
||||||
|
|
||||||
|
snapshot = engine._gather_system_snapshot()
|
||||||
|
assert "2" in snapshot # 2 thoughts today
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_prompt_includes_system_context(tmp_path):
|
||||||
|
"""The prompt sent to _call_agent should include system snapshot data."""
|
||||||
|
engine = _make_engine(tmp_path)
|
||||||
|
|
||||||
|
captured_prompts = []
|
||||||
|
|
||||||
|
def capture_agent(prompt):
|
||||||
|
captured_prompts.append(prompt)
|
||||||
|
return "A thought grounded in reality."
|
||||||
|
|
||||||
|
with (
|
||||||
|
patch.object(engine, "_call_agent", side_effect=capture_agent),
|
||||||
|
patch.object(engine, "_log_event"),
|
||||||
|
patch.object(engine, "_update_memory"),
|
||||||
|
patch.object(engine, "_broadcast", new_callable=AsyncMock),
|
||||||
|
):
|
||||||
|
await engine.think_once()
|
||||||
|
|
||||||
|
assert len(captured_prompts) == 1
|
||||||
|
prompt = captured_prompts[0]
|
||||||
|
# Should contain time (from system snapshot)
|
||||||
|
assert "AM" in prompt or "PM" in prompt
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Dashboard route
|
# Dashboard route
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user