fix: conversation grounding to prevent topic drift in Workshop (#406)

Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
This commit is contained in:
2026-03-19 02:39:15 -04:00
committed by hermes
parent 3571d528ad
commit b67dbe922f
2 changed files with 143 additions and 1 deletions

View File

@@ -45,6 +45,11 @@ _conversation: deque[dict] = deque(maxlen=_MAX_EXCHANGES)
_WORKSHOP_SESSION_ID = "workshop"
# Conversation grounding — anchor to opening topic so Timmy doesn't drift.
_ground_topic: str | None = None
_ground_set_at: float = 0.0
_GROUND_TTL = 300 # seconds of inactivity before the anchor expires
def _read_presence_file() -> dict | None:
"""Read presence.json if it exists and is fresh enough."""
@@ -202,10 +207,32 @@ def _log_bark_failure(task: asyncio.Task) -> None:
logger.error("Bark task failed: %s", exc)
def reset_conversation_ground() -> None:
"""Clear the conversation grounding anchor (e.g. after inactivity)."""
global _ground_topic, _ground_set_at
_ground_topic = None
_ground_set_at = 0.0
def _refresh_ground(visitor_text: str) -> None:
"""Set or refresh the conversation grounding anchor.
The first visitor message in a session (or after the TTL expires)
becomes the anchor topic. Subsequent messages are grounded against it.
"""
global _ground_topic, _ground_set_at
now = time.time()
if _ground_topic is None or (now - _ground_set_at) > _GROUND_TTL:
_ground_topic = visitor_text[:120]
logger.debug("Ground topic set: %s", _ground_topic)
_ground_set_at = now
async def _bark_and_broadcast(visitor_text: str) -> None:
"""Generate a bark response and broadcast it to all Workshop clients."""
await _broadcast(json.dumps({"type": "timmy_thinking"}))
_refresh_ground(visitor_text)
reply = await _generate_bark(visitor_text)
_conversation.append({"visitor": visitor_text, "timmy": reply})
@@ -225,12 +252,17 @@ async def _generate_bark(visitor_text: str) -> str:
"""Generate a short in-character bark response.
Uses the existing Timmy session with a dedicated workshop session ID.
When a grounding anchor exists, the opening topic is prepended so the
model stays on-topic across long sessions.
Gracefully degrades to a canned response if inference fails.
"""
try:
from timmy import session as _session
response = await _session.chat(visitor_text, session_id=_WORKSHOP_SESSION_ID)
grounded = visitor_text
if _ground_topic and visitor_text != _ground_topic:
grounded = f"[Workshop conversation topic: {_ground_topic}]\n{visitor_text}"
response = await _session.chat(grounded, session_id=_WORKSHOP_SESSION_ID)
return response
except Exception as exc:
logger.warning("Bark generation failed: %s", exc)

View File

@@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from dashboard.routes.world import (
_GROUND_TTL,
_STALE_THRESHOLD,
_bark_and_broadcast,
_broadcast,
@@ -17,7 +18,9 @@ from dashboard.routes.world import (
_handle_client_message,
_log_bark_failure,
_read_presence_file,
_refresh_ground,
broadcast_world_state,
reset_conversation_ground,
)
# ---------------------------------------------------------------------------
@@ -280,6 +283,7 @@ async def test_handle_client_message_ignores_empty_text():
@pytest.mark.asyncio
async def test_generate_bark_returns_response():
"""_generate_bark returns the chat response."""
reset_conversation_ground()
with patch("timmy.session.chat", new_callable=AsyncMock) as mock_chat:
mock_chat.return_value = "Woof! Good to see you."
result = await _generate_bark("Hey Timmy!")
@@ -291,6 +295,7 @@ async def test_generate_bark_returns_response():
@pytest.mark.asyncio
async def test_generate_bark_fallback_on_error():
"""_generate_bark returns canned response when chat fails."""
reset_conversation_ground()
with patch(
"timmy.session.chat",
new_callable=AsyncMock,
@@ -309,6 +314,7 @@ async def test_bark_and_broadcast_sends_thinking_then_speech():
ws = AsyncMock()
_ws_clients.append(ws)
_conversation.clear()
reset_conversation_ground()
try:
with patch(
"timmy.session.chat",
@@ -355,6 +361,7 @@ async def test_conversation_buffer_caps_at_max():
ws = AsyncMock()
_ws_clients.append(ws)
_conversation.clear()
reset_conversation_ground()
try:
with patch(
"timmy.session.chat",
@@ -396,3 +403,106 @@ def test_log_bark_failure_ignores_cancelled():
task = MagicMock(spec=asyncio.Task)
task.cancelled.return_value = True
_log_bark_failure(task) # should not raise
# ---------------------------------------------------------------------------
# Conversation grounding (#322)
# ---------------------------------------------------------------------------
class TestConversationGrounding:
"""Tests for conversation grounding — prevent topic drift."""
def setup_method(self):
reset_conversation_ground()
def teardown_method(self):
reset_conversation_ground()
def test_refresh_ground_sets_topic_on_first_message(self):
"""First visitor message becomes the grounding anchor."""
import dashboard.routes.world as w
_refresh_ground("Tell me about the Bible")
assert w._ground_topic == "Tell me about the Bible"
assert w._ground_set_at > 0
def test_refresh_ground_keeps_topic_on_subsequent_messages(self):
"""Subsequent messages don't overwrite the anchor."""
import dashboard.routes.world as w
_refresh_ground("Tell me about the Bible")
_refresh_ground("What about Genesis?")
assert w._ground_topic == "Tell me about the Bible"
def test_refresh_ground_resets_after_ttl(self):
"""Anchor expires after _GROUND_TTL seconds of inactivity."""
import dashboard.routes.world as w
_refresh_ground("Tell me about the Bible")
# Simulate TTL expiry
w._ground_set_at = time.time() - _GROUND_TTL - 1
_refresh_ground("Now tell me about cooking")
assert w._ground_topic == "Now tell me about cooking"
def test_refresh_ground_truncates_long_messages(self):
"""Anchor text is capped at 120 characters."""
import dashboard.routes.world as w
long_msg = "x" * 200
_refresh_ground(long_msg)
assert len(w._ground_topic) == 120
def test_reset_conversation_ground_clears_state(self):
"""reset_conversation_ground clears the anchor."""
import dashboard.routes.world as w
_refresh_ground("Some topic")
reset_conversation_ground()
assert w._ground_topic is None
assert w._ground_set_at == 0.0
@pytest.mark.asyncio
async def test_generate_bark_prepends_ground_topic(self):
"""When grounded, the topic is prepended to the visitor message."""
_refresh_ground("Tell me about prayer")
with patch("timmy.session.chat", new_callable=AsyncMock) as mock_chat:
mock_chat.return_value = "Great question!"
await _generate_bark("What else can you share?")
call_text = mock_chat.call_args[0][0]
assert "[Workshop conversation topic: Tell me about prayer]" in call_text
assert "What else can you share?" in call_text
@pytest.mark.asyncio
async def test_generate_bark_no_prefix_for_first_message(self):
"""First message (which IS the anchor) is not prefixed."""
_refresh_ground("Tell me about prayer")
with patch("timmy.session.chat", new_callable=AsyncMock) as mock_chat:
mock_chat.return_value = "Sure!"
await _generate_bark("Tell me about prayer")
call_text = mock_chat.call_args[0][0]
assert "[Workshop conversation topic:" not in call_text
assert call_text == "Tell me about prayer"
@pytest.mark.asyncio
async def test_bark_and_broadcast_sets_ground(self):
"""_bark_and_broadcast sets the ground topic automatically."""
import dashboard.routes.world as w
from dashboard.routes.world import _ws_clients
ws = AsyncMock()
_ws_clients.append(ws)
_conversation.clear()
try:
with patch(
"timmy.session.chat",
new_callable=AsyncMock,
return_value="Interesting!",
):
await _bark_and_broadcast("What is grace?")
assert w._ground_topic == "What is grace?"
finally:
_ws_clients.clear()
_conversation.clear()