#!/usr/bin/env python3 """ TurboQuant Server Manager Manages llama-server lifecycle for integration tests: - Start server with TurboQuant flags - Wait for health check - Stop server on teardown Usage: from tests.server_manager import TurboQuantServer with TurboQuantServer(model_path="/path/to/model.gguf") as server: url = server.url # e.g. http://localhost:8081 # Run tests against server """ import json import os import signal import subprocess import sys import time import urllib.request import urllib.error from pathlib import Path from typing import Optional class TurboQuantServer: """Context manager for llama-server with TurboQuant.""" def __init__( self, model_path: str, port: int = 8081, kv_type: str = "turbo4", context_size: int = 32768, server_binary: Optional[str] = None, timeout: float = 60.0, host: str = "127.0.0.1", ): self.model_path = model_path self.port = port self.kv_type = kv_type self.context_size = context_size self.timeout = timeout self.host = host # Find server binary if server_binary: self.server_binary = server_binary else: # Try common locations candidates = [ Path.home() / "llama-cpp-turboquant" / "build" / "bin" / "llama-server", Path("/opt/llama-cpp-turboquant/build/bin/llama-server"), Path("llama-server"), # PATH ] self.server_binary = None for c in candidates: if c.exists() or c.name == "llama-server": try: subprocess.run([str(c), "--help"], capture_output=True, timeout=5) self.server_binary = str(c) break except (FileNotFoundError, subprocess.TimeoutExpired): continue self.process: Optional[subprocess.Popen] = None @property def url(self) -> str: return f"http://{self.host}:{self.port}" def _build_command(self) -> list: cmd = [ self.server_binary, "-m", self.model_path, "--port", str(self.port), "--host", self.host, "-ctk", self.kv_type, "-ctv", self.kv_type, "-c", str(self.context_size), ] return cmd def _check_health(self) -> bool: try: req = urllib.request.Request(f"{self.url}/v1/models") resp = urllib.request.urlopen(req, timeout=5) data = json.loads(resp.read()) return "data" in data and len(data.get("data", [])) > 0 except Exception: return False def start(self) -> str: """Start the server and wait for it to be healthy. Returns the server URL.""" if not self.server_binary: raise RuntimeError( "llama-server binary not found. Set server_binary or install to standard location." ) if not Path(self.model_path).exists(): raise FileNotFoundError(f"Model not found: {self.model_path}") cmd = self._build_command() # Set TurboQuant env env = os.environ.copy() env["TURBO_LAYER_ADAPTIVE"] = "7" self.process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, ) # Wait for health start = time.time() while time.time() - start < self.timeout: if self.process.poll() is not None: stderr = self.process.stderr.read().decode() if self.process.stderr else "" raise RuntimeError(f"Server exited early (code {self.process.returncode}): {stderr[:500]}") if self._check_health(): return self.url time.sleep(1.0) self.stop() raise TimeoutError(f"Server did not become healthy within {self.timeout}s") def stop(self): """Stop the server.""" if self.process: try: self.process.send_signal(signal.SIGTERM) self.process.wait(timeout=10) except subprocess.TimeoutExpired: self.process.kill() self.process.wait(timeout=5) except Exception: pass self.process = None def __enter__(self) -> "TurboQuantServer": self.start() return self def __exit__(self, *args): self.stop() def find_server_binary() -> Optional[str]: """Find llama-server binary in common locations.""" candidates = [ Path.home() / "llama-cpp-turboquant" / "build" / "bin" / "llama-server", Path("/opt/llama-cpp-turboquant/build/bin/llama-server"), ] for c in candidates: if c.exists(): return str(c) # Try PATH try: result = subprocess.run(["which", "llama-server"], capture_output=True, text=True) if result.returncode == 0: return result.stdout.strip() except Exception: pass return None def find_model(model_dir: Optional[str] = None) -> Optional[str]: """Find a GGUF model file.""" search_dirs = [ model_dir, os.environ.get("TURBOQUANT_MODEL_DIR"), str(Path.home() / "models"), "/opt/models", "/tmp/models", ] for d in search_dirs: if not d: continue p = Path(d) if p.is_file() and p.suffix == ".gguf": return str(p) if p.is_dir(): for f in sorted(p.rglob("*.gguf")): return str(f) return None