turboquant/tests/conftest.py

"""Pytest configuration for turboquant."""
import os
import sys
import pytest
from pathlib import Path

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))


@pytest.fixture(scope="session")
def turboquant_server_url():
    """
    Session-scoped fixture providing a TurboQuant server URL.

    If TURBOQUANT_SERVER_URL is set, uses that directly.
    Otherwise, auto-starts a llama-server with TurboQuant flags.

    Requires:
        - llama-server binary (in PATH or standard location)
        - GGUF model file (in TURBOQUANT_MODEL_DIR or standard locations)

    Skips if server cannot be started.
    """
    # If URL already provided, use it
    if os.environ.get("TURBOQUANT_SERVER_URL"):
        yield os.environ["TURBOQUANT_SERVER_URL"]
        return

    # Try to auto-start
    try:
        from server_manager import TurboQuantServer, find_server_binary, find_model
    except ImportError:
        pytest.skip("server_manager not available")
        return

    binary = find_server_binary()
    if not binary:
        pytest.skip("llama-server binary not found — install llama-cpp-turboquant")
        return

    model = find_model()
    if not model:
        pytest.skip("No GGUF model found — set TURBOQUANT_MODEL_DIR or place model in ~/models")
        return

    port = int(os.environ.get("TURBOQUANT_TEST_PORT", "18081"))
    kv_type = os.environ.get("TURBOQUANT_KV_TYPE", "turbo4")
    ctx_size = int(os.environ.get("TURBOQUANT_CTX_SIZE", "8192"))
    timeout = float(os.environ.get("TURBOQUANT_STARTUP_TIMEOUT", "60"))

    server = TurboQuantServer(
        model_path=model,
        port=port,
        kv_type=kv_type,
        context_size=ctx_size,
        server_binary=binary,
        timeout=timeout,
    )

    try:
        url = server.start()
        yield url
    except Exception as e:
        pytest.skip(f"Could not start TurboQuant server: {e}")
    finally:
        server.stop()


@pytest.fixture(scope="session")
def turboquant_model_name(turboquant_server_url):
    """Get the model name from the running server."""
    import json
    import urllib.request

    try:
        req = urllib.request.Request(f"{turboquant_server_url}/v1/models")
        resp = urllib.request.urlopen(req, timeout=10)
        data = json.loads(resp.read())
        models = data.get("data", [])
        if models:
            return models[0].get("id", "unknown")
    except Exception:
        pass
    return "gemma-4"