fix: add commitment tracking to prevent topic drift in Workshop sessions
Tracks action commitments Timmy makes during Workshop conversations (e.g. "I'll draft the ticket"). After 5 messages without follow-up, open commitments are surfaced as grounding context to the LLM. Commitments can be closed programmatically via close_commitment(). Fixes #322 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ or missing.
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from collections import deque
|
||||
from datetime import UTC, datetime
|
||||
@@ -45,6 +46,83 @@ _conversation: deque[dict] = deque(maxlen=_MAX_EXCHANGES)
|
||||
|
||||
_WORKSHOP_SESSION_ID = "workshop"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Conversation grounding — commitment tracking (issue #322)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Patterns that indicate Timmy is committing to an action.
|
||||
_COMMITMENT_PATTERNS: list[re.Pattern[str]] = [
|
||||
re.compile(r"I'll (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
|
||||
re.compile(r"I will (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
|
||||
re.compile(r"[Ll]et me (.+?)(?:\.|!|\?|$)", re.IGNORECASE),
|
||||
]
|
||||
|
||||
# After this many messages without follow-up, surface open commitments.
|
||||
_REMIND_AFTER = 5
|
||||
_MAX_COMMITMENTS = 10
|
||||
|
||||
# In-memory list of open commitments.
|
||||
# Each entry: {"text": str, "created_at": float, "messages_since": int}
|
||||
_commitments: list[dict] = []
|
||||
|
||||
|
||||
def _extract_commitments(text: str) -> list[str]:
|
||||
"""Pull commitment phrases from Timmy's reply text."""
|
||||
found: list[str] = []
|
||||
for pattern in _COMMITMENT_PATTERNS:
|
||||
for match in pattern.finditer(text):
|
||||
phrase = match.group(1).strip()
|
||||
if len(phrase) > 5: # skip trivially short matches
|
||||
found.append(phrase[:120])
|
||||
return found
|
||||
|
||||
|
||||
def _record_commitments(reply: str) -> None:
|
||||
"""Scan a Timmy reply for commitments and store them."""
|
||||
for phrase in _extract_commitments(reply):
|
||||
# Avoid near-duplicate commitments
|
||||
if any(c["text"] == phrase for c in _commitments):
|
||||
continue
|
||||
_commitments.append({"text": phrase, "created_at": time.time(), "messages_since": 0})
|
||||
if len(_commitments) > _MAX_COMMITMENTS:
|
||||
_commitments.pop(0)
|
||||
|
||||
|
||||
def _tick_commitments() -> None:
|
||||
"""Increment messages_since for every open commitment."""
|
||||
for c in _commitments:
|
||||
c["messages_since"] += 1
|
||||
|
||||
|
||||
def _build_commitment_context() -> str:
|
||||
"""Return a grounding note if any commitments are overdue for follow-up."""
|
||||
overdue = [c for c in _commitments if c["messages_since"] >= _REMIND_AFTER]
|
||||
if not overdue:
|
||||
return ""
|
||||
lines = [f"- {c['text']}" for c in overdue]
|
||||
return (
|
||||
"[Open commitments Timmy made earlier — "
|
||||
"weave awareness naturally, don't list robotically]\n" + "\n".join(lines)
|
||||
)
|
||||
|
||||
|
||||
def close_commitment(index: int) -> bool:
|
||||
"""Remove a commitment by index. Returns True if removed."""
|
||||
if 0 <= index < len(_commitments):
|
||||
_commitments.pop(index)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_commitments() -> list[dict]:
|
||||
"""Return a copy of open commitments (for testing / API)."""
|
||||
return list(_commitments)
|
||||
|
||||
|
||||
def reset_commitments() -> None:
|
||||
"""Clear all commitments (for testing / session reset)."""
|
||||
_commitments.clear()
|
||||
|
||||
|
||||
def _read_presence_file() -> dict | None:
|
||||
"""Read presence.json if it exists and is fresh enough."""
|
||||
@@ -206,7 +284,9 @@ async def _bark_and_broadcast(visitor_text: str) -> None:
|
||||
"""Generate a bark response and broadcast it to all Workshop clients."""
|
||||
await _broadcast(json.dumps({"type": "timmy_thinking"}))
|
||||
|
||||
_tick_commitments()
|
||||
reply = await _generate_bark(visitor_text)
|
||||
_record_commitments(reply)
|
||||
|
||||
_conversation.append({"visitor": visitor_text, "timmy": reply})
|
||||
|
||||
@@ -225,12 +305,18 @@ async def _generate_bark(visitor_text: str) -> str:
|
||||
"""Generate a short in-character bark response.
|
||||
|
||||
Uses the existing Timmy session with a dedicated workshop session ID.
|
||||
Prepends commitment context when open commitments are overdue.
|
||||
Gracefully degrades to a canned response if inference fails.
|
||||
"""
|
||||
try:
|
||||
from timmy import session as _session
|
||||
|
||||
response = await _session.chat(visitor_text, session_id=_WORKSHOP_SESSION_ID)
|
||||
grounded = visitor_text
|
||||
context = _build_commitment_context()
|
||||
if context:
|
||||
grounded = f"{context}\n{visitor_text}"
|
||||
|
||||
response = await _session.chat(grounded, session_id=_WORKSHOP_SESSION_ID)
|
||||
return response
|
||||
except Exception as exc:
|
||||
logger.warning("Bark generation failed: %s", exc)
|
||||
|
||||
@@ -8,16 +8,25 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
import pytest
|
||||
|
||||
from dashboard.routes.world import (
|
||||
_REMIND_AFTER,
|
||||
_STALE_THRESHOLD,
|
||||
_bark_and_broadcast,
|
||||
_broadcast,
|
||||
_build_commitment_context,
|
||||
_build_world_state,
|
||||
_commitments,
|
||||
_conversation,
|
||||
_extract_commitments,
|
||||
_generate_bark,
|
||||
_handle_client_message,
|
||||
_log_bark_failure,
|
||||
_read_presence_file,
|
||||
_record_commitments,
|
||||
_tick_commitments,
|
||||
broadcast_world_state,
|
||||
close_commitment,
|
||||
get_commitments,
|
||||
reset_commitments,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -396,3 +405,168 @@ def test_log_bark_failure_ignores_cancelled():
|
||||
task = MagicMock(spec=asyncio.Task)
|
||||
task.cancelled.return_value = True
|
||||
_log_bark_failure(task) # should not raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Conversation grounding — commitment tracking (#322)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(autouse=False)
|
||||
def _clean_commitments():
|
||||
"""Reset commitments before and after each commitment test."""
|
||||
reset_commitments()
|
||||
yield
|
||||
reset_commitments()
|
||||
|
||||
|
||||
class TestExtractCommitments:
|
||||
def test_extracts_ill_pattern(self):
|
||||
text = "I'll draft the skeleton ticket in 30 minutes."
|
||||
result = _extract_commitments(text)
|
||||
assert len(result) == 1
|
||||
assert "draft the skeleton ticket" in result[0]
|
||||
|
||||
def test_extracts_i_will_pattern(self):
|
||||
result = _extract_commitments("I will review that PR tomorrow.")
|
||||
assert len(result) == 1
|
||||
assert "review that PR tomorrow" in result[0]
|
||||
|
||||
def test_extracts_let_me_pattern(self):
|
||||
result = _extract_commitments("Let me write up a summary for you.")
|
||||
assert len(result) == 1
|
||||
assert "write up a summary" in result[0]
|
||||
|
||||
def test_skips_short_matches(self):
|
||||
result = _extract_commitments("I'll do it.")
|
||||
# "do it" is 5 chars — should be skipped (needs > 5)
|
||||
assert result == []
|
||||
|
||||
def test_no_commitments_in_normal_text(self):
|
||||
result = _extract_commitments("The weather is nice today.")
|
||||
assert result == []
|
||||
|
||||
def test_truncates_long_commitments(self):
|
||||
long_phrase = "a" * 200
|
||||
result = _extract_commitments(f"I'll {long_phrase}.")
|
||||
assert len(result) == 1
|
||||
assert len(result[0]) == 120
|
||||
|
||||
|
||||
class TestRecordCommitments:
|
||||
def test_records_new_commitment(self, _clean_commitments):
|
||||
_record_commitments("I'll draft the ticket now.")
|
||||
assert len(get_commitments()) == 1
|
||||
assert get_commitments()[0]["messages_since"] == 0
|
||||
|
||||
def test_avoids_duplicate_commitments(self, _clean_commitments):
|
||||
_record_commitments("I'll draft the ticket now.")
|
||||
_record_commitments("I'll draft the ticket now.")
|
||||
assert len(get_commitments()) == 1
|
||||
|
||||
def test_caps_at_max(self, _clean_commitments):
|
||||
from dashboard.routes.world import _MAX_COMMITMENTS
|
||||
|
||||
for i in range(_MAX_COMMITMENTS + 3):
|
||||
_record_commitments(f"I'll handle commitment number {i} right away.")
|
||||
assert len(get_commitments()) <= _MAX_COMMITMENTS
|
||||
|
||||
|
||||
class TestTickAndContext:
|
||||
def test_tick_increments_messages_since(self, _clean_commitments):
|
||||
_commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
|
||||
_tick_commitments()
|
||||
_tick_commitments()
|
||||
assert _commitments[0]["messages_since"] == 2
|
||||
|
||||
def test_context_empty_when_no_overdue(self, _clean_commitments):
|
||||
_commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
|
||||
assert _build_commitment_context() == ""
|
||||
|
||||
def test_context_surfaces_overdue_commitments(self, _clean_commitments):
|
||||
_commitments.append(
|
||||
{
|
||||
"text": "draft the skeleton ticket",
|
||||
"created_at": 0,
|
||||
"messages_since": _REMIND_AFTER,
|
||||
}
|
||||
)
|
||||
ctx = _build_commitment_context()
|
||||
assert "draft the skeleton ticket" in ctx
|
||||
assert "Open commitments" in ctx
|
||||
|
||||
def test_context_only_includes_overdue(self, _clean_commitments):
|
||||
_commitments.append({"text": "recent thing", "created_at": 0, "messages_since": 1})
|
||||
_commitments.append(
|
||||
{
|
||||
"text": "old thing",
|
||||
"created_at": 0,
|
||||
"messages_since": _REMIND_AFTER,
|
||||
}
|
||||
)
|
||||
ctx = _build_commitment_context()
|
||||
assert "old thing" in ctx
|
||||
assert "recent thing" not in ctx
|
||||
|
||||
|
||||
class TestCloseCommitment:
|
||||
def test_close_valid_index(self, _clean_commitments):
|
||||
_commitments.append({"text": "write the docs", "created_at": 0, "messages_since": 0})
|
||||
assert close_commitment(0) is True
|
||||
assert len(get_commitments()) == 0
|
||||
|
||||
def test_close_invalid_index(self, _clean_commitments):
|
||||
assert close_commitment(99) is False
|
||||
|
||||
|
||||
class TestGroundingIntegration:
|
||||
@pytest.mark.asyncio
|
||||
async def test_bark_records_commitments_from_reply(self, _clean_commitments):
|
||||
from dashboard.routes.world import _ws_clients
|
||||
|
||||
ws = AsyncMock()
|
||||
_ws_clients.append(ws)
|
||||
_conversation.clear()
|
||||
try:
|
||||
with patch(
|
||||
"timmy.session.chat",
|
||||
new_callable=AsyncMock,
|
||||
return_value="I'll draft the ticket for you!",
|
||||
):
|
||||
await _bark_and_broadcast("Can you help?")
|
||||
|
||||
assert len(get_commitments()) == 1
|
||||
assert "draft the ticket" in get_commitments()[0]["text"]
|
||||
finally:
|
||||
_ws_clients.clear()
|
||||
_conversation.clear()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_bark_prepends_context_after_n_messages(self, _clean_commitments):
|
||||
"""After _REMIND_AFTER messages, commitment context is prepended."""
|
||||
_commitments.append(
|
||||
{
|
||||
"text": "draft the skeleton ticket",
|
||||
"created_at": 0,
|
||||
"messages_since": _REMIND_AFTER - 1,
|
||||
}
|
||||
)
|
||||
|
||||
with patch(
|
||||
"timmy.session.chat",
|
||||
new_callable=AsyncMock,
|
||||
return_value="Sure thing!",
|
||||
) as mock_chat:
|
||||
# This tick will push messages_since to _REMIND_AFTER
|
||||
await _generate_bark("Any updates?")
|
||||
# _generate_bark doesn't tick — _bark_and_broadcast does.
|
||||
# But we pre-set messages_since to _REMIND_AFTER - 1,
|
||||
# so we need to tick once to make it overdue.
|
||||
_tick_commitments()
|
||||
await _generate_bark("Any updates?")
|
||||
|
||||
# Second call should have context prepended
|
||||
last_call = mock_chat.call_args_list[-1]
|
||||
sent_text = last_call[0][0]
|
||||
assert "draft the skeleton ticket" in sent_text
|
||||
assert "Open commitments" in sent_text
|
||||
|
||||
Reference in New Issue
Block a user