hermes-agent/tests/test_tensorzero_eval_packet.py

from pathlib import Path
import sys

SCRIPT_DIR = Path(__file__).resolve().parents[1] / "scripts"
sys.path.insert(0, str(SCRIPT_DIR))

import tensorzero_eval_packet as tz


def test_scan_touchpoints_finds_expected_matches(tmp_path):
    (tmp_path / "run_agent.py").write_text(
        "self._fallback_chain = []\n# Provider fallback chain\n"
    )
    (tmp_path / "hermes_cli").mkdir()
    (tmp_path / "hermes_cli" / "runtime_provider.py").write_text(
        "def resolve_runtime_provider():\n    return {}\n"
    )
    (tmp_path / "agent").mkdir()
    (tmp_path / "agent" / "smart_model_routing.py").write_text(
        "def resolve_turn_route(user_message, routing_config, primary):\n    return primary\n"
    )
    (tmp_path / "gateway").mkdir()
    (tmp_path / "gateway" / "run.py").write_text(
        "def _load_provider_routing():\n    return {}\n"
    )
    (tmp_path / "cron").mkdir()
    (tmp_path / "cron" / "scheduler.py").write_text(
        "runtime = resolve_runtime_provider()\nturn_route = resolve_turn_route('x', {}, {})\n"
    )
    (tmp_path / "hermes_state.py").write_text("class SessionDB:\n    pass\n")
    (tmp_path / "benchmarks").mkdir()
    (tmp_path / "benchmarks" / "tool_call_benchmark.py").write_text(
        "class ToolCall: ...\n"
    )

    touchpoints = tz.scan_touchpoints(tmp_path)

    labels = {tp.label for tp in touchpoints}
    assert "fallback_chain" in labels
    assert "runtime_provider" in labels
    assert "smart_model_routing" in labels
    assert "gateway_provider_routing" in labels
    assert "cron_runtime_provider" in labels
    assert "session_db" in labels
    assert "benchmark_suite" in labels


def test_build_requirement_matrix_marks_canary_as_gap_without_split_support():
    touchpoints = [
        tz.Touchpoint(
            label="runtime_provider",
            file_path="hermes_cli/runtime_provider.py",
            line_number=10,
            matched_text="def resolve_runtime_provider",
        ),
        tz.Touchpoint(
            label="provider_routing_config",
            file_path="cli.py",
            line_number=20,
            matched_text='provider_routing',
        ),
        tz.Touchpoint(
            label="fallback_chain",
            file_path="run_agent.py",
            line_number=21,
            matched_text='_fallback_chain = []',
        ),
        tz.Touchpoint(
            label="smart_model_routing",
            file_path="agent/smart_model_routing.py",
            line_number=30,
            matched_text='resolve_turn_route',
        ),
        tz.Touchpoint(
            label="gateway_provider_routing",
            file_path="gateway/run.py",
            line_number=35,
            matched_text='def _load_provider_routing',
        ),
        tz.Touchpoint(
            label="cron_runtime_provider",
            file_path="cron/scheduler.py",
            line_number=36,
            matched_text='runtime = resolve_runtime_provider()',
        ),
        tz.Touchpoint(
            label="session_db",
            file_path="hermes_state.py",
            line_number=40,
            matched_text='class SessionDB',
        ),
        tz.Touchpoint(
            label="trajectory_export",
            file_path="batch_runner.py",
            line_number=50,
            matched_text='trajectory_entry',
        ),
        tz.Touchpoint(
            label="benchmark_suite",
            file_path="benchmarks/tool_call_benchmark.py",
            line_number=60,
            matched_text='ToolCall',
        ),
    ]

    matrix = tz.build_requirement_matrix(touchpoints)
    by_key = {row.key: row for row in matrix}

    assert by_key["gateway_replacement"].status == "partial"
    assert by_key["config_migration"].status == "partial"
    assert by_key["canary_rollout"].status == "gap"
    assert by_key["session_feedback"].status == "partial"
    assert by_key["evaluation_suite"].status == "partial"


def test_build_markdown_renders_recommendation_and_touchpoints():
    touchpoints = [
        tz.Touchpoint(
            label="runtime_provider",
            file_path="hermes_cli/runtime_provider.py",
            line_number=10,
            matched_text="def resolve_runtime_provider",
        ),
        tz.Touchpoint(
            label="session_db",
            file_path="hermes_state.py",
            line_number=40,
            matched_text='class SessionDB',
        ),
    ]
    matrix = tz.build_requirement_matrix(touchpoints)
    report = tz.build_report(touchpoints, matrix)
    markdown = tz.build_markdown(report)

    assert "# TensorZero Evaluation Packet" in markdown
    assert "gateway_replacement" not in markdown  # human labels, not raw keys
    assert "Gateway replacement scope" in markdown
    assert "Not ready for direct replacement" in markdown
    assert "hermes_cli/runtime_provider.py:10" in markdown
    assert "hermes_state.py:40" in markdown


def test_issue_context_is_embedded_in_report():
    report = tz.build_report([], [])
    markdown = tz.build_markdown(report)

    assert "Issue #860" in markdown
    assert "tensorzero" in markdown.lower()
    assert "10% traffic" in markdown