Some checks failed
Test / pytest (pull_request) Failing after 7s
Add scripts/knowledge_to_training_pairs.py which reads quality-gated knowledge entries from knowledge/index.json and emits terse→rich training pairs in JSONL format. Features: - Derives terse queries from facts via category-aware heuristics - Configurable quality filters: min-confidence, model-filter, date range - Output includes domain, source_confidence, source_model - Smoke tests added in tests/test_knowledge_to_training_pairs.py Deliverables for #199: 1. Pipeline script: scripts/knowledge_to_training_pairs.py 2. End-to-end: knowledge/index.json → training_pairs.jsonl (or custom JSONL) 3. Config: min-confidence, model-filter, after/before date filters 4. Test: 9 smoke tests covering conversion, filtering, and end-to-end run Closes #199
175 lines
5.2 KiB
Python
175 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Smoke tests for knowledge_to_training_pairs.py
|
|
|
|
Tests:
|
|
- Output is valid JSONL
|
|
- Each line has required fields (terse, rich, domain, source_confidence, source_model)
|
|
- Confidence values are in [0,1]
|
|
- Terse is non-empty and reasonably short (< 200 chars)
|
|
- Rich matches the original fact
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
# Add scripts dir to path for imports
|
|
SCRIPT_DIR = Path(__file__).parent.parent / "scripts"
|
|
sys.path.insert(0, str(SCRIPT_DIR))
|
|
|
|
from knowledge_to_training_pairs import (
|
|
fact_to_terse,
|
|
filter_entries,
|
|
entry_to_pair,
|
|
parse_date,
|
|
)
|
|
|
|
|
|
def test_fact_to_terse_pitfall():
|
|
fact = "deploy-crons.py leaves jobs in mixed model format"
|
|
category = "pitfall"
|
|
domain = "hermes-agent"
|
|
terse = fact_to_terse(fact, category, domain)
|
|
assert terse.startswith("How do I")
|
|
assert "?" in terse
|
|
assert len(terse) < 150
|
|
print("PASS: test_fact_to_terse_pitfall")
|
|
|
|
|
|
def test_fact_to_terse_fact():
|
|
fact = "Python is a high-level programming language"
|
|
terse = fact_to_terse(fact, "fact", "global")
|
|
assert terse.startswith("What should I know about")
|
|
assert "?" in terse
|
|
print("PASS: test_fact_to_terse_fact")
|
|
|
|
|
|
def test_fact_to_terse_pattern():
|
|
fact = "Use sparse checkout for large repos"
|
|
terse = fact_to_terse(fact, "pattern", "devops")
|
|
assert "recommended way" in terse or "best way" in terse
|
|
print("PASS: test_fact_to_terse_pattern")
|
|
|
|
|
|
def test_entry_to_pair_structure():
|
|
entry = {
|
|
"id": "test:001",
|
|
"fact": "Test fact text.",
|
|
"category": "fact",
|
|
"domain": "test-domain",
|
|
"confidence": 0.85,
|
|
"model": "test-model",
|
|
}
|
|
pair = entry_to_pair(entry)
|
|
assert pair is not None
|
|
assert "terse" in pair
|
|
assert "rich" in pair
|
|
assert "domain" in pair
|
|
assert "source_confidence" in pair
|
|
assert "source_model" in pair
|
|
assert pair["rich"] == "Test fact text."
|
|
assert pair["domain"] == "test-domain"
|
|
assert 0.0 <= pair["source_confidence"] <= 1.0
|
|
print("PASS: test_entry_to_pair_structure")
|
|
|
|
|
|
def test_filter_by_confidence():
|
|
entries = [
|
|
{"fact": "A", "confidence": 0.9},
|
|
{"fact": "B", "confidence": 0.4},
|
|
{"fact": "C", "confidence": 0.6},
|
|
]
|
|
filtered = filter_entries(entries, min_confidence=0.5)
|
|
assert len(filtered) == 2
|
|
assert all(e["confidence"] >= 0.5 for e in filtered)
|
|
print("PASS: test_filter_by_confidence")
|
|
|
|
|
|
def test_filter_by_model():
|
|
entries = [
|
|
{"fact": "A", "model": "claude-sonnet"},
|
|
{"fact": "B", "model": "gpt-4"},
|
|
{"fact": "C", "model": "unknown"},
|
|
]
|
|
filtered = filter_entries(entries, model_filter=["claude-sonnet", "gpt-4"])
|
|
assert len(filtered) == 2
|
|
assert all(e["model"] in ("claude-sonnet", "gpt-4") for e in filtered)
|
|
print("PASS: test_filter_by_model")
|
|
|
|
|
|
def test_filter_by_date():
|
|
entries = [
|
|
{"fact": "A", "last_confirmed": "2026-04-10"},
|
|
{"fact": "B", "last_confirmed": "2026-03-01"},
|
|
{"fact": "C", "first_seen": "2026-04-15"},
|
|
]
|
|
after_dt = parse_date("2026-04-01")
|
|
filtered = filter_entries(entries, after=after_dt)
|
|
assert len(filtered) == 2
|
|
print("PASS: test_filter_by_date")
|
|
|
|
|
|
def test_end_to_end_jsonl_output():
|
|
"""Integration test: run the script and verify JSONL validity."""
|
|
import subprocess
|
|
|
|
repo_dir = SCRIPT_DIR.parent
|
|
result = subprocess.run(
|
|
["python3", "scripts/knowledge_to_training_pairs.py", "--dry-run"],
|
|
capture_output=True, text=True, cwd=repo_dir
|
|
)
|
|
assert result.returncode == 0
|
|
stderr = result.stderr.strip()
|
|
|
|
# The stats JSON object is at the top of stderr. Find its bounds via brace matching.
|
|
start = stderr.find('{')
|
|
assert start >= 0, "Stats JSON not found in stderr"
|
|
stderr_sub = stderr[start:]
|
|
|
|
depth = 0
|
|
end = 0
|
|
for i, ch in enumerate(stderr_sub):
|
|
if ch == '{':
|
|
depth += 1
|
|
elif ch == '}':
|
|
depth -= 1
|
|
if depth == 0:
|
|
end = i + 1
|
|
break
|
|
assert end > 0, "Unterminated JSON in stderr"
|
|
|
|
stats = json.loads(stderr_sub[:end])
|
|
assert stats["input_entries"] > 0
|
|
assert stats["pairs_generated"] > 0
|
|
print("PASS: test_end_to_end_jsonl_output")
|
|
|
|
|
|
def test_terse_length_constraint():
|
|
"""Terse should be reasonably short for training."""
|
|
# Sample facts from actual knowledge
|
|
test_facts = [
|
|
("deploy-crons.py leaves jobs in mixed model format", "pitfall", "hermes-agent"),
|
|
("Cron jobs with blank fallback_model fields trigger warnings", "pitfall", "hermes-agent"),
|
|
("Use the Gitea REST API when clone times out", "pattern", "devops"),
|
|
]
|
|
for fact, cat, domain in test_facts:
|
|
terse = fact_to_terse(fact, cat, domain)
|
|
assert len(terse) < 200, f"Terse too long ({len(terse)}): {terse}"
|
|
print("PASS: test_terse_length_constraint")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_fact_to_terse_pitfall()
|
|
test_fact_to_terse_fact()
|
|
test_fact_to_terse_pattern()
|
|
test_entry_to_pair_structure()
|
|
test_filter_by_confidence()
|
|
test_filter_by_model()
|
|
test_filter_by_date()
|
|
test_end_to_end_jsonl_output()
|
|
test_terse_length_constraint()
|
|
print("\nAll smoke tests passed.")
|