All checks were successful
Smoke Test / smoke (pull_request) Successful in 21s
- turboquant_server_url fixture: auto-starts llama-server if no URL provided - Finds binary in standard locations or PATH - Finds GGUF model in standard locations - Configurable via env vars (port, kv_type, ctx_size, timeout) - Skips gracefully if binary or model not found - turboquant_model_name fixture for model discovery
86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
"""Pytest configuration for turboquant."""
|
|
import os
|
|
import sys
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def turboquant_server_url():
|
|
"""
|
|
Session-scoped fixture providing a TurboQuant server URL.
|
|
|
|
If TURBOQUANT_SERVER_URL is set, uses that directly.
|
|
Otherwise, auto-starts a llama-server with TurboQuant flags.
|
|
|
|
Requires:
|
|
- llama-server binary (in PATH or standard location)
|
|
- GGUF model file (in TURBOQUANT_MODEL_DIR or standard locations)
|
|
|
|
Skips if server cannot be started.
|
|
"""
|
|
# If URL already provided, use it
|
|
if os.environ.get("TURBOQUANT_SERVER_URL"):
|
|
yield os.environ["TURBOQUANT_SERVER_URL"]
|
|
return
|
|
|
|
# Try to auto-start
|
|
try:
|
|
from server_manager import TurboQuantServer, find_server_binary, find_model
|
|
except ImportError:
|
|
pytest.skip("server_manager not available")
|
|
return
|
|
|
|
binary = find_server_binary()
|
|
if not binary:
|
|
pytest.skip("llama-server binary not found — install llama-cpp-turboquant")
|
|
return
|
|
|
|
model = find_model()
|
|
if not model:
|
|
pytest.skip("No GGUF model found — set TURBOQUANT_MODEL_DIR or place model in ~/models")
|
|
return
|
|
|
|
port = int(os.environ.get("TURBOQUANT_TEST_PORT", "18081"))
|
|
kv_type = os.environ.get("TURBOQUANT_KV_TYPE", "turbo4")
|
|
ctx_size = int(os.environ.get("TURBOQUANT_CTX_SIZE", "8192"))
|
|
timeout = float(os.environ.get("TURBOQUANT_STARTUP_TIMEOUT", "60"))
|
|
|
|
server = TurboQuantServer(
|
|
model_path=model,
|
|
port=port,
|
|
kv_type=kv_type,
|
|
context_size=ctx_size,
|
|
server_binary=binary,
|
|
timeout=timeout,
|
|
)
|
|
|
|
try:
|
|
url = server.start()
|
|
yield url
|
|
except Exception as e:
|
|
pytest.skip(f"Could not start TurboQuant server: {e}")
|
|
finally:
|
|
server.stop()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def turboquant_model_name(turboquant_server_url):
|
|
"""Get the model name from the running server."""
|
|
import json
|
|
import urllib.request
|
|
|
|
try:
|
|
req = urllib.request.Request(f"{turboquant_server_url}/v1/models")
|
|
resp = urllib.request.urlopen(req, timeout=10)
|
|
data = json.loads(resp.read())
|
|
models = data.get("data", [])
|
|
if models:
|
|
return models[0].get("id", "unknown")
|
|
except Exception:
|
|
pass
|
|
return "gemma-4"
|