compounding-intelligence/tests/test_session_pair_harvester.py

"""
Tests for session_pair_harvester — training pair extraction from sessions.
"""

import json
import tempfile
import unittest
from pathlib import Path

import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from session_pair_harvester import (
    extract_pairs_from_conversation,
    extract_from_jsonl_file,
    deduplicate_pairs,
    compute_hash,
)


class TestSessionPairHarvester(unittest.TestCase):
    def test_compute_hash_consistent(self):
        h1 = compute_hash("hello world")
        h2 = compute_hash("hello world")
        self.assertEqual(h1, h2)
        self.assertEqual(len(h1), 16)

    def test_extract_simple_qa_pair(self):
        """A simple user→assistant exchange produces one pair."""
        conversation = [
            {"role": "user", "content": "What is the capital of France?"},
            {"role": "assistant", "content": "The capital of France is Paris. It is a major European city renowned for its art, fashion, gastronomy, cultural heritage, and historical significance. The city attracts millions of tourists annually."},
        ]
        pairs = extract_pairs_from_conversation(conversation, "test_session", "test-model")
        self.assertEqual(len(pairs), 1)
        self.assertEqual(pairs[0]["terse"], "What is the capital of France?")
        self.assertIn("Paris", pairs[0]["rich"])
        self.assertEqual(pairs[0]["source"], "test_session")

    def test_min_ratio_filter(self):
        """Very short responses are filtered out."""
        conversation = [
            {"role": "user", "content": "Yes"},
            {"role": "assistant", "content": "No."},
        ]
        # Default min_ratio = 1.5, min_words = 20 for response
        pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=3)
        self.assertEqual(len(pairs), 0)

    def test_min_words_filter(self):
        """Assistant responses below min word count are skipped."""
        conversation = [
            {"role": "user", "content": "Explain the project architecture in detail"},
            {"role": "assistant", "content": "OK."},
        ]
        pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=5)
        self.assertEqual(len(pairs), 0)

    def test_skip_non_assistant_messages(self):
        """System and tool messages are ignored."""
        conversation = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Hello"},
            {"role": "assistant", "content": "Hi there! How can I help you today?"},
        ]
        pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=3)
        self.assertEqual(len(pairs), 1)
        self.assertEqual(pairs[0]["terse"], "Hello")

    def test_multiple_pairs_from_one_session(self):
        """A conversation with several Q&A turns yields multiple pairs."""
        conversation = [
            {"role": "user", "content": "First question?"},
            {"role": "assistant", "content": "Here is a detailed and comprehensive answer that thoroughly explores multiple aspects of the subject. It provides background context and practical implications for the reader."},
            {"role": "user", "content": "Second?"},
            {"role": "assistant", "content": "Another comprehensive response with detailed examples. This includes practical code blocks and thorough explanations to ensure deep understanding of the topic at hand."},
        ]
        pairs = extract_pairs_from_conversation(conversation, "s", "m", min_ratio=1.0)
        self.assertEqual(len(pairs), 2)

    def test_deduplication_removes_duplicates(self):
        """Identical pairs across sessions are deduplicated."""
        pairs = [
            {"terse": "q1", "rich": "a1", "source": "s1", "model": "m"},
            {"terse": "q1", "rich": "a1", "source": "s2", "model": "m"},
            {"terse": "q2", "rich": "a2", "source": "s1", "model": "m"},
        ]
        unique = deduplicate_pairs(pairs)
        self.assertEqual(len(unique), 2)
        sources = {p["source"] for p in unique}
        # First unique pair can be from either s1 or s2
        self.assertIn("s1", sources)

    def test_integration_with_test_sessions(self):
        """Harvester finds pairs in real test session files."""
        repo_root = Path(__file__).parent.parent
        test_sessions_dir = repo_root / "test_sessions"
        if not test_sessions_dir.exists():
            self.skipTest("test_sessions not found")

        pairs = []
        for jsonl_file in sorted(test_sessions_dir.glob("*.jsonl")):
            pairs.extend(extract_from_jsonl_file(str(jsonl_file)))

        self.assertGreater(len(pairs), 0, "Should extract at least one pair from test_sessions")
        for p in pairs:
            self.assertIn("terse", p)
            self.assertIn("rich", p)
            self.assertIn("source", p)
            self.assertIn("model", p)
            # Verify content exists
            self.assertGreater(len(p["terse"]), 0)
            self.assertGreater(len(p["rich"]), 0)


if __name__ == "__main__":
    unittest.main()