Some checks failed
Test / pytest (pull_request) Failing after 8s
- Harvester used old message fields (from/value) but Hermes sessions use role/content - Import session_reader to normalize conversations properly - Update extract function to operate on normalized role/content messages - Change predecessor lookup from "human"/"gpt" to "user"/"assistant" - Add comprehensive smoke tests (8 tests, all pass) - Verify extraction from test_sessions: 11 pairs, avg ratio 8.13
119 lines
5.1 KiB
Python
119 lines
5.1 KiB
Python
"""
|
|
Tests for session_pair_harvester — training pair extraction from sessions.
|
|
"""
|
|
|
|
import json
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
from session_pair_harvester import (
|
|
extract_pairs_from_conversation,
|
|
extract_from_jsonl_file,
|
|
deduplicate_pairs,
|
|
compute_hash,
|
|
)
|
|
|
|
|
|
class TestSessionPairHarvester(unittest.TestCase):
|
|
def test_compute_hash_consistent(self):
|
|
h1 = compute_hash("hello world")
|
|
h2 = compute_hash("hello world")
|
|
self.assertEqual(h1, h2)
|
|
self.assertEqual(len(h1), 16)
|
|
|
|
def test_extract_simple_qa_pair(self):
|
|
"""A simple user→assistant exchange produces one pair."""
|
|
conversation = [
|
|
{"role": "user", "content": "What is the capital of France?"},
|
|
{"role": "assistant", "content": "The capital of France is Paris. It is a major European city renowned for its art, fashion, gastronomy, cultural heritage, and historical significance. The city attracts millions of tourists annually."},
|
|
]
|
|
pairs = extract_pairs_from_conversation(conversation, "test_session", "test-model")
|
|
self.assertEqual(len(pairs), 1)
|
|
self.assertEqual(pairs[0]["terse"], "What is the capital of France?")
|
|
self.assertIn("Paris", pairs[0]["rich"])
|
|
self.assertEqual(pairs[0]["source"], "test_session")
|
|
|
|
def test_min_ratio_filter(self):
|
|
"""Very short responses are filtered out."""
|
|
conversation = [
|
|
{"role": "user", "content": "Yes"},
|
|
{"role": "assistant", "content": "No."},
|
|
]
|
|
# Default min_ratio = 1.5, min_words = 20 for response
|
|
pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=3)
|
|
self.assertEqual(len(pairs), 0)
|
|
|
|
def test_min_words_filter(self):
|
|
"""Assistant responses below min word count are skipped."""
|
|
conversation = [
|
|
{"role": "user", "content": "Explain the project architecture in detail"},
|
|
{"role": "assistant", "content": "OK."},
|
|
]
|
|
pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=5)
|
|
self.assertEqual(len(pairs), 0)
|
|
|
|
def test_skip_non_assistant_messages(self):
|
|
"""System and tool messages are ignored."""
|
|
conversation = [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi there! How can I help you today?"},
|
|
]
|
|
pairs = extract_pairs_from_conversation(conversation, "s", "m", min_response_words=3)
|
|
self.assertEqual(len(pairs), 1)
|
|
self.assertEqual(pairs[0]["terse"], "Hello")
|
|
|
|
def test_multiple_pairs_from_one_session(self):
|
|
"""A conversation with several Q&A turns yields multiple pairs."""
|
|
conversation = [
|
|
{"role": "user", "content": "First question?"},
|
|
{"role": "assistant", "content": "Here is a detailed and comprehensive answer that thoroughly explores multiple aspects of the subject. It provides background context and practical implications for the reader."},
|
|
{"role": "user", "content": "Second?"},
|
|
{"role": "assistant", "content": "Another comprehensive response with detailed examples. This includes practical code blocks and thorough explanations to ensure deep understanding of the topic at hand."},
|
|
]
|
|
pairs = extract_pairs_from_conversation(conversation, "s", "m", min_ratio=1.0)
|
|
self.assertEqual(len(pairs), 2)
|
|
|
|
def test_deduplication_removes_duplicates(self):
|
|
"""Identical pairs across sessions are deduplicated."""
|
|
pairs = [
|
|
{"terse": "q1", "rich": "a1", "source": "s1", "model": "m"},
|
|
{"terse": "q1", "rich": "a1", "source": "s2", "model": "m"},
|
|
{"terse": "q2", "rich": "a2", "source": "s1", "model": "m"},
|
|
]
|
|
unique = deduplicate_pairs(pairs)
|
|
self.assertEqual(len(unique), 2)
|
|
sources = {p["source"] for p in unique}
|
|
# First unique pair can be from either s1 or s2
|
|
self.assertIn("s1", sources)
|
|
|
|
def test_integration_with_test_sessions(self):
|
|
"""Harvester finds pairs in real test session files."""
|
|
repo_root = Path(__file__).parent.parent
|
|
test_sessions_dir = repo_root / "test_sessions"
|
|
if not test_sessions_dir.exists():
|
|
self.skipTest("test_sessions not found")
|
|
|
|
pairs = []
|
|
for jsonl_file in sorted(test_sessions_dir.glob("*.jsonl")):
|
|
pairs.extend(extract_from_jsonl_file(str(jsonl_file)))
|
|
|
|
self.assertGreater(len(pairs), 0, "Should extract at least one pair from test_sessions")
|
|
for p in pairs:
|
|
self.assertIn("terse", p)
|
|
self.assertIn("rich", p)
|
|
self.assertIn("source", p)
|
|
self.assertIn("model", p)
|
|
# Verify content exists
|
|
self.assertGreater(len(p["terse"]), 0)
|
|
self.assertGreater(len(p["rich"]), 0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|
|
|