compounding-intelligence/tests/test_knowledge_to_training_pairs.py

#!/usr/bin/env python3
"""
Smoke tests for knowledge_to_training_pairs.py

Tests:
  - Output is valid JSONL
  - Each line has required fields (terse, rich, domain, source_confidence, source_model)
  - Confidence values are in [0,1]
  - Terse is non-empty and reasonably short (< 200 chars)
  - Rich matches the original fact
"""

import json
import sys
import os
import tempfile
from pathlib import Path

# Add scripts dir to path for imports
SCRIPT_DIR = Path(__file__).parent.parent / "scripts"
sys.path.insert(0, str(SCRIPT_DIR))

from knowledge_to_training_pairs import (
    fact_to_terse,
    filter_entries,
    entry_to_pair,
    parse_date,
)


def test_fact_to_terse_pitfall():
    fact = "deploy-crons.py leaves jobs in mixed model format"
    category = "pitfall"
    domain = "hermes-agent"
    terse = fact_to_terse(fact, category, domain)
    assert terse.startswith("How do I")
    assert "?" in terse
    assert len(terse) < 150
    print("PASS: test_fact_to_terse_pitfall")


def test_fact_to_terse_fact():
    fact = "Python is a high-level programming language"
    terse = fact_to_terse(fact, "fact", "global")
    assert terse.startswith("What should I know about")
    assert "?" in terse
    print("PASS: test_fact_to_terse_fact")


def test_fact_to_terse_pattern():
    fact = "Use sparse checkout for large repos"
    terse = fact_to_terse(fact, "pattern", "devops")
    assert "recommended way" in terse or "best way" in terse
    print("PASS: test_fact_to_terse_pattern")


def test_entry_to_pair_structure():
    entry = {
        "id": "test:001",
        "fact": "Test fact text.",
        "category": "fact",
        "domain": "test-domain",
        "confidence": 0.85,
        "model": "test-model",
    }
    pair = entry_to_pair(entry)
    assert pair is not None
    assert "terse" in pair
    assert "rich" in pair
    assert "domain" in pair
    assert "source_confidence" in pair
    assert "source_model" in pair
    assert pair["rich"] == "Test fact text."
    assert pair["domain"] == "test-domain"
    assert 0.0 <= pair["source_confidence"] <= 1.0
    print("PASS: test_entry_to_pair_structure")


def test_filter_by_confidence():
    entries = [
        {"fact": "A", "confidence": 0.9},
        {"fact": "B", "confidence": 0.4},
        {"fact": "C", "confidence": 0.6},
    ]
    filtered = filter_entries(entries, min_confidence=0.5)
    assert len(filtered) == 2
    assert all(e["confidence"] >= 0.5 for e in filtered)
    print("PASS: test_filter_by_confidence")


def test_filter_by_model():
    entries = [
        {"fact": "A", "model": "claude-sonnet"},
        {"fact": "B", "model": "gpt-4"},
        {"fact": "C", "model": "unknown"},
    ]
    filtered = filter_entries(entries, model_filter=["claude-sonnet", "gpt-4"])
    assert len(filtered) == 2
    assert all(e["model"] in ("claude-sonnet", "gpt-4") for e in filtered)
    print("PASS: test_filter_by_model")


def test_filter_by_date():
    entries = [
        {"fact": "A", "last_confirmed": "2026-04-10"},
        {"fact": "B", "last_confirmed": "2026-03-01"},
        {"fact": "C", "first_seen": "2026-04-15"},
    ]
    after_dt = parse_date("2026-04-01")
    filtered = filter_entries(entries, after=after_dt)
    assert len(filtered) == 2
    print("PASS: test_filter_by_date")


def test_end_to_end_jsonl_output():
    """Integration test: run the script and verify JSONL validity."""
    import subprocess

    repo_dir = SCRIPT_DIR.parent
    result = subprocess.run(
        ["python3", "scripts/knowledge_to_training_pairs.py", "--dry-run"],
        capture_output=True, text=True, cwd=repo_dir
    )
    assert result.returncode == 0
    stderr = result.stderr.strip()

    # The stats JSON object is at the top of stderr. Find its bounds via brace matching.
    start = stderr.find('{')
    assert start >= 0, "Stats JSON not found in stderr"
    stderr_sub = stderr[start:]

    depth = 0
    end = 0
    for i, ch in enumerate(stderr_sub):
        if ch == '{':
            depth += 1
        elif ch == '}':
            depth -= 1
            if depth == 0:
                end = i + 1
                break
    assert end > 0, "Unterminated JSON in stderr"

    stats = json.loads(stderr_sub[:end])
    assert stats["input_entries"] > 0
    assert stats["pairs_generated"] > 0
    print("PASS: test_end_to_end_jsonl_output")


def test_terse_length_constraint():
    """Terse should be reasonably short for training."""
    # Sample facts from actual knowledge
    test_facts = [
        ("deploy-crons.py leaves jobs in mixed model format", "pitfall", "hermes-agent"),
        ("Cron jobs with blank fallback_model fields trigger warnings", "pitfall", "hermes-agent"),
        ("Use the Gitea REST API when clone times out", "pattern", "devops"),
    ]
    for fact, cat, domain in test_facts:
        terse = fact_to_terse(fact, cat, domain)
        assert len(terse) < 200, f"Terse too long ({len(terse)}): {terse}"
    print("PASS: test_terse_length_constraint")


if __name__ == "__main__":
    test_fact_to_terse_pitfall()
    test_fact_to_terse_fact()
    test_fact_to_terse_pattern()
    test_entry_to_pair_structure()
    test_filter_by_confidence()
    test_filter_by_model()
    test_filter_by_date()
    test_end_to_end_jsonl_output()
    test_terse_length_constraint()
    print("\nAll smoke tests passed.")