fix(gateway): seed DM thread sessions with parent transcript to preserve context

This commit is contained in:
Gary Chiu
2026-04-01 22:33:52 -04:00
committed by Teknium
parent e94b4b2b40
commit 8cb3596939
2 changed files with 258 additions and 0 deletions

View File

@@ -738,6 +738,43 @@ class SessionStore:
except Exception as e:
print(f"[gateway] Warning: Failed to create SQLite session: {e}")
# Seed new DM thread sessions with parent DM session history.
# When a bot reply creates a Slack thread and the user responds in it,
# the thread gets a new session (keyed by thread_ts). Without seeding,
# the thread session starts with zero context — the user's original
# question and the bot's answer are invisible. Fix: copy the parent
# DM session's transcript into the new thread session so context carries
# over while still keeping threads isolated from each other.
if (
source.chat_type == "dm"
and source.thread_id
and entry.created_at == entry.updated_at # brand-new session
and not was_auto_reset
):
parent_key = build_session_key(
SessionSource(
platform=source.platform,
chat_id=source.chat_id,
chat_type="dm",
user_id=source.user_id,
# no thread_id — this is the parent DM session
),
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
)
with self._lock:
parent_entry = self._entries.get(parent_key)
if parent_entry and parent_entry.session_id != entry.session_id:
try:
parent_history = self.load_transcript(parent_entry.session_id)
if parent_history:
self.rewrite_transcript(entry.session_id, parent_history)
logger.info(
"[Session] Seeded DM thread session %s with %d messages from parent %s",
entry.session_id, len(parent_history), parent_entry.session_id,
)
except Exception as e:
logger.warning("[Session] Failed to seed thread session: %s", e)
return entry
def update_session(

View File

@@ -0,0 +1,221 @@
"""Tests for DM thread session seeding.
When a bot reply creates a thread in a DM (e.g. Slack), the user's reply
in that thread gets a new session (keyed by thread_ts). The seeding logic
copies the parent DM session's transcript into the new thread session so
the bot retains context of the original conversation.
Covers:
- Basic seeding: parent transcript copied to new thread session
- No seeding for group/channel chats
- No seeding when parent session doesn't exist
- No seeding on auto-reset sessions
- No seeding on existing (non-new) thread sessions
- Parent transcript is not mutated by seeding
- Multiple threads from same parent each get independent copies
- Cross-platform: works for any platform with DM threads (Slack, Telegram, Discord)
"""
import pytest
from unittest.mock import patch
from gateway.config import Platform, GatewayConfig
from gateway.session import SessionSource, SessionStore, build_session_key
@pytest.fixture()
def store(tmp_path):
"""SessionStore with no SQLite, for fast unit tests."""
config = GatewayConfig()
with patch("gateway.session.SessionStore._ensure_loaded"):
s = SessionStore(sessions_dir=tmp_path, config=config)
s._db = None
s._loaded = True
return s
def _dm_source(platform=Platform.SLACK, chat_id="D123", thread_id=None, user_id="U1"):
return SessionSource(
platform=platform,
chat_id=chat_id,
chat_type="dm",
user_id=user_id,
thread_id=thread_id,
)
def _group_source(platform=Platform.SLACK, chat_id="C456", thread_id=None, user_id="U1"):
return SessionSource(
platform=platform,
chat_id=chat_id,
chat_type="group",
user_id=user_id,
thread_id=thread_id,
)
PARENT_HISTORY = [
{"role": "user", "content": "What's the weather?"},
{"role": "assistant", "content": "It's sunny and 72°F."},
]
class TestDMThreadSeeding:
"""Core seeding behavior."""
def test_thread_session_seeded_from_parent(self, store):
"""New DM thread session should contain the parent's transcript."""
# Create parent DM session with history
parent_source = _dm_source()
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
# Create thread session (user replied in thread)
thread_source = _dm_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
# Thread should have parent's history
thread_transcript = store.load_transcript(thread_entry.session_id)
assert len(thread_transcript) == 2
assert thread_transcript[0]["content"] == "What's the weather?"
assert thread_transcript[1]["content"] == "It's sunny and 72°F."
def test_parent_transcript_not_mutated(self, store):
"""Seeding should not alter the parent session's transcript."""
parent_source = _dm_source()
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
# Create thread and add a message to it
thread_source = _dm_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
store.append_to_transcript(thread_entry.session_id, {
"role": "user", "content": "thread-only message"
})
# Parent should still have only its original messages
parent_transcript = store.load_transcript(parent_entry.session_id)
assert len(parent_transcript) == 2
assert all(m["content"] != "thread-only message" for m in parent_transcript)
def test_multiple_threads_get_independent_copies(self, store):
"""Each thread from the same parent gets its own copy."""
parent_source = _dm_source()
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
# Thread A
thread_a_source = _dm_source(thread_id="1111.000001")
thread_a_entry = store.get_or_create_session(thread_a_source)
store.append_to_transcript(thread_a_entry.session_id, {
"role": "user", "content": "thread A message"
})
# Thread B
thread_b_source = _dm_source(thread_id="2222.000002")
thread_b_entry = store.get_or_create_session(thread_b_source)
# Thread B should have parent history, not thread A's additions
thread_b_transcript = store.load_transcript(thread_b_entry.session_id)
assert len(thread_b_transcript) == 2
assert all(m["content"] != "thread A message" for m in thread_b_transcript)
# Thread A should have parent history + its own message
thread_a_transcript = store.load_transcript(thread_a_entry.session_id)
assert len(thread_a_transcript) == 3
def test_existing_thread_session_not_reseeded(self, store):
"""Returning to an existing thread session should not re-copy parent history."""
parent_source = _dm_source()
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
# Create thread session
thread_source = _dm_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
store.append_to_transcript(thread_entry.session_id, {
"role": "user", "content": "follow-up"
})
# Add more to parent after thread was created
store.append_to_transcript(parent_entry.session_id, {
"role": "user", "content": "new parent message"
})
# Get the same thread session again (not new — created_at != updated_at)
thread_entry_again = store.get_or_create_session(thread_source)
assert thread_entry_again.session_id == thread_entry.session_id
# Should still have 3 messages (2 seeded + 1 follow-up), not re-seeded
thread_transcript = store.load_transcript(thread_entry_again.session_id)
assert len(thread_transcript) == 3
assert thread_transcript[2]["content"] == "follow-up"
class TestDMThreadSeedingEdgeCases:
"""Edge cases and conditions where seeding should NOT happen."""
def test_no_seeding_for_group_threads(self, store):
"""Group/channel threads should not trigger seeding."""
parent_source = _group_source()
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
thread_source = _group_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
thread_transcript = store.load_transcript(thread_entry.session_id)
assert len(thread_transcript) == 0
def test_no_seeding_without_parent_session(self, store):
"""Thread session without a parent DM session should start empty."""
thread_source = _dm_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
thread_transcript = store.load_transcript(thread_entry.session_id)
assert len(thread_transcript) == 0
def test_no_seeding_with_empty_parent(self, store):
"""If parent session exists but has no transcript, thread starts empty."""
parent_source = _dm_source()
store.get_or_create_session(parent_source)
# No messages appended to parent
thread_source = _dm_source(thread_id="1234567890.000001")
thread_entry = store.get_or_create_session(thread_source)
thread_transcript = store.load_transcript(thread_entry.session_id)
assert len(thread_transcript) == 0
def test_no_seeding_for_dm_without_thread_id(self, store):
"""Top-level DMs (no thread_id) should not trigger seeding."""
source = _dm_source()
entry = store.get_or_create_session(source)
# Should just be a normal empty session
transcript = store.load_transcript(entry.session_id)
assert len(transcript) == 0
class TestDMThreadSeedingCrossPlatform:
"""Verify seeding works for platforms beyond Slack."""
@pytest.mark.parametrize("platform", [Platform.SLACK, Platform.TELEGRAM, Platform.DISCORD])
def test_seeding_works_across_platforms(self, store, platform):
"""DM thread seeding should work for any platform that uses thread_id."""
parent_source = _dm_source(platform=platform)
parent_entry = store.get_or_create_session(parent_source)
for msg in PARENT_HISTORY:
store.append_to_transcript(parent_entry.session_id, msg)
thread_source = _dm_source(platform=platform, thread_id="thread_123")
thread_entry = store.get_or_create_session(thread_source)
thread_transcript = store.load_transcript(thread_entry.session_id)
assert len(thread_transcript) == 2
assert thread_transcript[0]["content"] == "What's the weather?"