Timmy-time-dashboard/tests/scripts/test_export_trajectories.py

"""Unit tests for scripts/export_trajectories.py.

Tests trajectory conversion logic — no I/O, no Ollama, no mlx.
"""

from __future__ import annotations

import json
from pathlib import Path

import pytest

import scripts.export_trajectories as et


# ── Fixtures ──────────────────────────────────────────────────────────────────


@pytest.fixture()
def simple_session(tmp_path: Path) -> Path:
    """Write a minimal session JSONL file and return the logs dir."""
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    entries = [
        {"type": "message", "role": "user", "content": "What time is it?", "timestamp": "2026-03-01T10:00:00"},
        {"type": "message", "role": "timmy", "content": "It is 10:00 AM.", "timestamp": "2026-03-01T10:00:01"},
        {"type": "message", "role": "user", "content": "Thanks!", "timestamp": "2026-03-01T10:00:05"},
        {"type": "message", "role": "timmy", "content": "You're welcome!", "timestamp": "2026-03-01T10:00:06"},
    ]
    session_file = logs_dir / "session_2026-03-01.jsonl"
    session_file.write_text("\n".join(json.dumps(e) for e in entries) + "\n")
    return logs_dir


@pytest.fixture()
def tool_call_session(tmp_path: Path) -> Path:
    """Write a session JSONL with tool calls."""
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    entries = [
        {"type": "message", "role": "user", "content": "Read CLAUDE.md", "timestamp": "2026-03-01T10:00:00"},
        {
            "type": "tool_call",
            "tool": "read_file",
            "args": {"path": "CLAUDE.md"},
            "result": "# CLAUDE.md content here",
            "timestamp": "2026-03-01T10:00:01",
        },
        {"type": "message", "role": "timmy", "content": "Here is the content.", "timestamp": "2026-03-01T10:00:02"},
    ]
    session_file = logs_dir / "session_2026-03-01.jsonl"
    session_file.write_text("\n".join(json.dumps(e) for e in entries) + "\n")
    return logs_dir


# ── _load_entries ─────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_load_entries_returns_all(simple_session: Path) -> None:
    entries = et._load_entries(simple_session)
    assert len(entries) == 4


@pytest.mark.unit
def test_load_entries_skips_malformed(tmp_path: Path) -> None:
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    session = logs_dir / "session_2026-03-01.jsonl"
    session.write_text(
        '{"type": "message", "role": "user", "content": "hi"}\n'
        "NOT_JSON\n"
        '{"type": "message", "role": "timmy", "content": "hello"}\n'
    )
    entries = et._load_entries(logs_dir)
    assert len(entries) == 2  # malformed line skipped


@pytest.mark.unit
def test_load_entries_empty_dir(tmp_path: Path) -> None:
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    entries = et._load_entries(logs_dir)
    assert entries == []


@pytest.mark.unit
def test_load_entries_multiple_files(tmp_path: Path) -> None:
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    for day in ("2026-03-01", "2026-03-02"):
        entry = {"type": "message", "role": "user", "content": f"day {day}"}
        (logs_dir / f"session_{day}.jsonl").write_text(json.dumps(entry) + "\n")
    entries = et._load_entries(logs_dir)
    assert len(entries) == 2


# ── _format_tool_call ─────────────────────────────────────────────────────────


@pytest.mark.unit
def test_format_tool_call_structure() -> None:
    entry = {
        "type": "tool_call",
        "tool": "read_file",
        "args": {"path": "/tmp/foo.txt"},
        "result": "file contents",
    }
    result = et._format_tool_call(entry)
    assert result.startswith("<tool_call>")
    assert result.endswith("</tool_call>")
    payload = json.loads(result.split("\n")[1])
    assert payload["name"] == "read_file"
    assert payload["arguments"]["path"] == "/tmp/foo.txt"


@pytest.mark.unit
def test_format_tool_call_missing_tool() -> None:
    entry = {"type": "tool_call", "args": {}}
    result = et._format_tool_call(entry)
    assert "unknown" in result


# ── _group_into_turns ─────────────────────────────────────────────────────────


@pytest.mark.unit
def test_group_basic_conversation() -> None:
    entries = [
        {"type": "message", "role": "user", "content": "hello"},
        {"type": "message", "role": "timmy", "content": "hi there"},
        {"type": "message", "role": "user", "content": "bye"},
        {"type": "message", "role": "timmy", "content": "goodbye"},
    ]
    turns = et._group_into_turns(entries)
    assert len(turns) == 2
    assert turns[0]["user"] == "hello"
    assert turns[0]["assistant"] == "hi there"
    assert turns[1]["user"] == "bye"
    assert turns[1]["assistant"] == "goodbye"


@pytest.mark.unit
def test_group_with_tool_call() -> None:
    entries = [
        {"type": "message", "role": "user", "content": "check the file"},
        {"type": "tool_call", "tool": "read_file", "args": {"path": "x"}, "result": "content"},
        {"type": "message", "role": "timmy", "content": "Done."},
    ]
    turns = et._group_into_turns(entries)
    assert len(turns) == 1
    assert "<tool_call>" in turns[0]["assistant"]
    assert "Done." in turns[0]["assistant"]


@pytest.mark.unit
def test_group_skips_user_without_response() -> None:
    """User message with no timmy response should not create a turn."""
    entries = [
        {"type": "message", "role": "user", "content": "hello"},
        # No timmy response
        {"type": "message", "role": "user", "content": "are you there?"},
        {"type": "message", "role": "timmy", "content": "Yes!"},
    ]
    turns = et._group_into_turns(entries)
    assert len(turns) == 1
    assert turns[0]["user"] == "are you there?"


@pytest.mark.unit
def test_group_ignores_errors_and_decisions() -> None:
    entries = [
        {"type": "message", "role": "user", "content": "hello"},
        {"type": "error", "error": "something failed"},
        {"type": "decision", "decision": "retry"},
        {"type": "message", "role": "timmy", "content": "Got it."},
    ]
    turns = et._group_into_turns(entries)
    assert len(turns) == 1
    assert "error" not in turns[0]["assistant"]
    assert "retry" not in turns[0]["assistant"]


@pytest.mark.unit
def test_group_empty_entries() -> None:
    assert et._group_into_turns([]) == []


# ── turns_to_training_examples ────────────────────────────────────────────────


@pytest.mark.unit
def test_training_examples_structure() -> None:
    turns = [{"user": "hello", "assistant": "hi there, how can I help?"}]
    examples = et.turns_to_training_examples(turns)
    assert len(examples) == 1
    msgs = examples[0]["messages"]
    assert msgs[0]["role"] == "system"
    assert msgs[1]["role"] == "user"
    assert msgs[1]["content"] == "hello"
    assert msgs[2]["role"] == "assistant"
    assert msgs[2]["content"] == "hi there, how can I help?"


@pytest.mark.unit
def test_training_examples_filters_short_responses() -> None:
    turns = [
        {"user": "hello", "assistant": "ok"},  # too short
        {"user": "hello", "assistant": "This is a longer response that passes."},
    ]
    examples = et.turns_to_training_examples(turns, min_assistant_len=10)
    assert len(examples) == 1
    assert examples[0]["messages"][2]["content"] == "This is a longer response that passes."


@pytest.mark.unit
def test_training_examples_filters_empty_user() -> None:
    turns = [{"user": "", "assistant": "some response here"}]
    examples = et.turns_to_training_examples(turns)
    assert len(examples) == 0


@pytest.mark.unit
def test_training_examples_uses_custom_system_prompt() -> None:
    turns = [{"user": "hi", "assistant": "hello there!"}]
    examples = et.turns_to_training_examples(turns, system_prompt="Custom prompt.")
    assert examples[0]["messages"][0]["content"] == "Custom prompt."


# ── export_training_data (integration-style, uses tmp_path) ──────────────────


@pytest.mark.unit
def test_export_training_data_writes_jsonl(simple_session: Path, tmp_path: Path) -> None:
    output = tmp_path / "train.jsonl"
    count = et.export_training_data(logs_dir=simple_session, output_path=output)
    assert count == 2
    assert output.exists()
    lines = [json.loads(l) for l in output.read_text().splitlines() if l.strip()]
    assert len(lines) == 2
    for line in lines:
        assert "messages" in line
        roles = [m["role"] for m in line["messages"]]
        assert roles == ["system", "user", "assistant"]


@pytest.mark.unit
def test_export_training_data_with_tool_calls(tool_call_session: Path, tmp_path: Path) -> None:
    output = tmp_path / "train.jsonl"
    count = et.export_training_data(logs_dir=tool_call_session, output_path=output)
    assert count == 1
    line = json.loads(output.read_text().strip())
    assistant_content = line["messages"][2]["content"]
    assert "<tool_call>" in assistant_content
    assert "read_file" in assistant_content


@pytest.mark.unit
def test_export_training_data_returns_zero_for_empty_logs(tmp_path: Path) -> None:
    logs_dir = tmp_path / "logs"
    logs_dir.mkdir()
    output = tmp_path / "train.jsonl"
    count = et.export_training_data(logs_dir=logs_dir, output_path=output)
    assert count == 0
    assert not output.exists()


# ── CLI ───────────────────────────────────────────────────────────────────────


@pytest.mark.unit
def test_cli_missing_logs_dir(tmp_path: Path) -> None:
    rc = et.main(["--logs-dir", str(tmp_path / "nonexistent"), "--output", str(tmp_path / "out.jsonl")])
    assert rc == 1


@pytest.mark.unit
def test_cli_exports_and_returns_zero(simple_session: Path, tmp_path: Path) -> None:
    output = tmp_path / "out.jsonl"
    rc = et.main([
        "--logs-dir", str(simple_session),
        "--output", str(output),
    ])
    assert rc == 0
    assert output.exists()