fix: add commitment tracking to prevent topic drift in Workshop sessions

Tracks action commitments Timmy makes during Workshop conversations (e.g. "I'll draft the ticket"). After 5 messages without follow-up, open commitments are surfaced as grounding context to the LLM. Commitments can be closed programmatically via close_commitment(). Fixes #322 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 02:42:00 -04:00
parent 3571d528ad
commit 99ab39c00c
2 changed files with 261 additions and 1 deletions
--- a/src/dashboard/routes/world.py
+++ b/src/dashboard/routes/world.py
@@ -17,6 +17,7 @@ or missing.
 import asyncio
 import json
 import logging
+import re
 import time
 from collections import deque
 from datetime import UTC, datetime
@@ -45,6 +46,83 @@ _conversation: deque[dict] = deque(maxlen=_MAX_EXCHANGES)

 _WORKSHOP_SESSION_ID = "workshop"

+# ---------------------------------------------------------------------------
+# Conversation grounding — commitment tracking (issue #322)
+# ---------------------------------------------------------------------------
+
+# Patterns that indicate Timmy is committing to an action.
+_COMMITMENT_PATTERNS: list[re.Pattern[str]] = [
+    re.compile(r"I'll (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
+    re.compile(r"I will (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
+    re.compile(r"[Ll]et me (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
+]
+
+# After this many messages without follow-up, surface open commitments.
+_REMIND_AFTER = 5
+_MAX_COMMITMENTS = 10
+
+# In-memory list of open commitments.
+# Each entry: {"text": str, "created_at": float, "messages_since": int}
+_commitments: list[dict] = []
+
+
+def _extract_commitments(text: str) -> list[str]:
+    """Pull commitment phrases from Timmy's reply text."""
+    found: list[str] = []
+    for pattern in _COMMITMENT_PATTERNS:
+        for match in pattern.finditer(text):
+            phrase = match.group(1).strip()
+            if len(phrase) > 5:  # skip trivially short matches
+                found.append(phrase[:120])
+    return found
+
+
+def _record_commitments(reply: str) -> None:
+    """Scan a Timmy reply for commitments and store them."""
+    for phrase in _extract_commitments(reply):
+        # Avoid near-duplicate commitments
+        if any(c["text"] == phrase for c in _commitments):
+            continue
+        _commitments.append({"text": phrase, "created_at": time.time(), "messages_since": 0})
+        if len(_commitments) > _MAX_COMMITMENTS:
+            _commitments.pop(0)
+
+
+def _tick_commitments() -> None:
+    """Increment messages_since for every open commitment."""
+    for c in _commitments:
+        c["messages_since"] += 1
+
+
+def _build_commitment_context() -> str:
+    """Return a grounding note if any commitments are overdue for follow-up."""
+    overdue = [c for c in _commitments if c["messages_since"] >= _REMIND_AFTER]
+    if not overdue:
+        return ""
+    lines = [f"- {c['text']}" for c in overdue]
+    return (
+        "[Open commitments Timmy made earlier — "
+        "weave awareness naturally, don't list robotically]\n" + "\n".join(lines)
+    )
+
+
+def close_commitment(index: int) -> bool:
+    """Remove a commitment by index. Returns True if removed."""
+    if 0 <= index < len(_commitments):
+        _commitments.pop(index)
+        return True
+    return False
+
+
+def get_commitments() -> list[dict]:
+    """Return a copy of open commitments (for testing / API)."""
+    return list(_commitments)
+
+
+def reset_commitments() -> None:
+    """Clear all commitments (for testing / session reset)."""
+    _commitments.clear()
+

 def _read_presence_file() -> dict | None:
    """Read presence.json if it exists and is fresh enough."""
@@ -206,7 +284,9 @@ async def _bark_and_broadcast(visitor_text: str) -> None:
    """Generate a bark response and broadcast it to all Workshop clients."""
    await _broadcast(json.dumps({"type": "timmy_thinking"}))

+    _tick_commitments()
    reply = await _generate_bark(visitor_text)
+    _record_commitments(reply)

    _conversation.append({"visitor": visitor_text, "timmy": reply})

@@ -225,12 +305,18 @@ async def _generate_bark(visitor_text: str) -> str:
    """Generate a short in-character bark response.

    Uses the existing Timmy session with a dedicated workshop session ID.
+    Prepends commitment context when open commitments are overdue.
    Gracefully degrades to a canned response if inference fails.
    """
    try:
        from timmy import session as _session

-        response = await _session.chat(visitor_text, session_id=_WORKSHOP_SESSION_ID)
+        grounded = visitor_text
+        context = _build_commitment_context()
+        if context:
+            grounded = f"{context}\n{visitor_text}"
+
+        response = await _session.chat(grounded, session_id=_WORKSHOP_SESSION_ID)
        return response
    except Exception as exc:
        logger.warning("Bark generation failed: %s", exc)
--- a/tests/dashboard/test_world_api.py
+++ b/tests/dashboard/test_world_api.py
@@ -8,16 +8,25 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest

 from dashboard.routes.world import (
+    _REMIND_AFTER,
    _STALE_THRESHOLD,
    _bark_and_broadcast,
    _broadcast,
+    _build_commitment_context,
    _build_world_state,
+    _commitments,
    _conversation,
+    _extract_commitments,
    _generate_bark,
    _handle_client_message,
    _log_bark_failure,
    _read_presence_file,
+    _record_commitments,
+    _tick_commitments,
    broadcast_world_state,
+    close_commitment,
+    get_commitments,
+    reset_commitments,
 )

 # ---------------------------------------------------------------------------
@@ -396,3 +405,168 @@ def test_log_bark_failure_ignores_cancelled():
    task = MagicMock(spec=asyncio.Task)
    task.cancelled.return_value = True
    _log_bark_failure(task)  # should not raise
+
+
+# ---------------------------------------------------------------------------
+# Conversation grounding — commitment tracking (#322)
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=False)
+def _clean_commitments():
+    """Reset commitments before and after each commitment test."""
+    reset_commitments()
+    yield
+    reset_commitments()
+
+
+class TestExtractCommitments:
+    def test_extracts_ill_pattern(self):
+        text = "I'll draft the skeleton ticket in 30 minutes."
+        result = _extract_commitments(text)
+        assert len(result) == 1
+        assert "draft the skeleton ticket" in result[0]
+
+    def test_extracts_i_will_pattern(self):
+        result = _extract_commitments("I will review that PR tomorrow.")
+        assert len(result) == 1
+        assert "review that PR tomorrow" in result[0]
+
+    def test_extracts_let_me_pattern(self):
+        result = _extract_commitments("Let me write up a summary for you.")
+        assert len(result) == 1
+        assert "write up a summary" in result[0]
+
+    def test_skips_short_matches(self):
+        result = _extract_commitments("I'll do it.")
+        # "do it" is 5 chars — should be skipped (needs > 5)
+        assert result == []
+
+    def test_no_commitments_in_normal_text(self):
+        result = _extract_commitments("The weather is nice today.")
+        assert result == []
+
+    def test_truncates_long_commitments(self):
+        long_phrase = "a" * 200
+        result = _extract_commitments(f"I'll {long_phrase}.")
+        assert len(result) == 1
+        assert len(result[0]) == 120
+
+
+class TestRecordCommitments:
+    def test_records_new_commitment(self, _clean_commitments):
+        _record_commitments("I'll draft the ticket now.")
+        assert len(get_commitments()) == 1
+        assert get_commitments()[0]["messages_since"] == 0
+
+    def test_avoids_duplicate_commitments(self, _clean_commitments):
+        _record_commitments("I'll draft the ticket now.")
+        _record_commitments("I'll draft the ticket now.")
+        assert len(get_commitments()) == 1
+
+    def test_caps_at_max(self, _clean_commitments):
+        from dashboard.routes.world import _MAX_COMMITMENTS
+
+        for i in range(_MAX_COMMITMENTS + 3):
+            _record_commitments(f"I'll handle commitment number {i} right away.")
+        assert len(get_commitments()) <= _MAX_COMMITMENTS
+
+
+class TestTickAndContext:
+    def test_tick_increments_messages_since(self, _clean_commitments):
+        _commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
+        _tick_commitments()
+        _tick_commitments()
+        assert _commitments[0]["messages_since"] == 2
+
+    def test_context_empty_when_no_overdue(self, _clean_commitments):
+        _commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
+        assert _build_commitment_context() == ""
+
+    def test_context_surfaces_overdue_commitments(self, _clean_commitments):
+        _commitments.append(
+            {
+                "text": "draft the skeleton ticket",
+                "created_at": 0,
+                "messages_since": _REMIND_AFTER,
+            }
+        )
+        ctx = _build_commitment_context()
+        assert "draft the skeleton ticket" in ctx
+        assert "Open commitments" in ctx
+
+    def test_context_only_includes_overdue(self, _clean_commitments):
+        _commitments.append({"text": "recent thing", "created_at": 0, "messages_since": 1})
+        _commitments.append(
+            {
+                "text": "old thing",
+                "created_at": 0,
+                "messages_since": _REMIND_AFTER,
+            }
+        )
+        ctx = _build_commitment_context()
+        assert "old thing" in ctx
+        assert "recent thing" not in ctx
+
+
+class TestCloseCommitment:
+    def test_close_valid_index(self, _clean_commitments):
+        _commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
+        assert close_commitment(0) is True
+        assert len(get_commitments()) == 0
+
+    def test_close_invalid_index(self, _clean_commitments):
+        assert close_commitment(99) is False
+
+
+class TestGroundingIntegration:
+    @pytest.mark.asyncio
+    async def test_bark_records_commitments_from_reply(self, _clean_commitments):
+        from dashboard.routes.world import _ws_clients
+
+        ws = AsyncMock()
+        _ws_clients.append(ws)
+        _conversation.clear()
+        try:
+            with patch(
+                "timmy.session.chat",
+                new_callable=AsyncMock,
+                return_value="I'll draft the ticket for you!",
+            ):
+                await _bark_and_broadcast("Can you help?")
+
+            assert len(get_commitments()) == 1
+            assert "draft the ticket" in get_commitments()[0]["text"]
+        finally:
+            _ws_clients.clear()
+            _conversation.clear()
+
+    @pytest.mark.asyncio
+    async def test_bark_prepends_context_after_n_messages(self, _clean_commitments):
+        """After _REMIND_AFTER messages, commitment context is prepended."""
+        _commitments.append(
+            {
+                "text": "draft the skeleton ticket",
+                "created_at": 0,
+                "messages_since": _REMIND_AFTER - 1,
+            }
+        )
+
+        with patch(
+            "timmy.session.chat",
+            new_callable=AsyncMock,
+            return_value="Sure thing!",
+        ) as mock_chat:
+            # This tick will push messages_since to _REMIND_AFTER
+            await _generate_bark("Any updates?")
+            # _generate_bark doesn't tick — _bark_and_broadcast does.
+            # But we pre-set messages_since to _REMIND_AFTER - 1,
+            # so we need to tick once to make it overdue.
+            _tick_commitments()
+            await _generate_bark("Any updates?")
+
+            # Second call should have context prepended
+            last_call = mock_chat.call_args_list[-1]
+            sent_text = last_call[0][0]
+            assert "draft the skeleton ticket" in sent_text
+            assert "Open commitments" in sent_text