Compare commits

..

1 Commits

Author SHA1 Message Date
e694969381 docs: replace stale raw-IP forge link with canonical domain (#46)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 19s
Replace http://143.198.27.163:3000 with
https://forge.alexanderwhitestone.com in docs/PROJECT_STATUS.md.

No other raw-IP references found in repo.

Closes #46.
2026-04-17 01:22:05 -04:00
3 changed files with 2 additions and 281 deletions

View File

@@ -385,7 +385,7 @@ Step 7: If pass → production. If fail → drop to turbo3 or adjust per-layer p
---
*Repo: http://143.198.27.163:3000/Timmy_Foundation/turboquant*
*Repo: https://forge.alexanderwhitestone.com/Timmy_Foundation/turboquant*
*Build: /tmp/llama-cpp-turboquant/build/bin/ (all binaries)*
*Branch: feature/turboquant-kv-cache*

View File

@@ -1,85 +1,3 @@
"""Pytest configuration for turboquant."""
import os
import sys
import pytest
from pathlib import Path
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
@pytest.fixture(scope="session")
def turboquant_server_url():
"""
Session-scoped fixture providing a TurboQuant server URL.
If TURBOQUANT_SERVER_URL is set, uses that directly.
Otherwise, auto-starts a llama-server with TurboQuant flags.
Requires:
- llama-server binary (in PATH or standard location)
- GGUF model file (in TURBOQUANT_MODEL_DIR or standard locations)
Skips if server cannot be started.
"""
# If URL already provided, use it
if os.environ.get("TURBOQUANT_SERVER_URL"):
yield os.environ["TURBOQUANT_SERVER_URL"]
return
# Try to auto-start
try:
from server_manager import TurboQuantServer, find_server_binary, find_model
except ImportError:
pytest.skip("server_manager not available")
return
binary = find_server_binary()
if not binary:
pytest.skip("llama-server binary not found — install llama-cpp-turboquant")
return
model = find_model()
if not model:
pytest.skip("No GGUF model found — set TURBOQUANT_MODEL_DIR or place model in ~/models")
return
port = int(os.environ.get("TURBOQUANT_TEST_PORT", "18081"))
kv_type = os.environ.get("TURBOQUANT_KV_TYPE", "turbo4")
ctx_size = int(os.environ.get("TURBOQUANT_CTX_SIZE", "8192"))
timeout = float(os.environ.get("TURBOQUANT_STARTUP_TIMEOUT", "60"))
server = TurboQuantServer(
model_path=model,
port=port,
kv_type=kv_type,
context_size=ctx_size,
server_binary=binary,
timeout=timeout,
)
try:
url = server.start()
yield url
except Exception as e:
pytest.skip(f"Could not start TurboQuant server: {e}")
finally:
server.stop()
@pytest.fixture(scope="session")
def turboquant_model_name(turboquant_server_url):
"""Get the model name from the running server."""
import json
import urllib.request
try:
req = urllib.request.Request(f"{turboquant_server_url}/v1/models")
resp = urllib.request.urlopen(req, timeout=10)
data = json.loads(resp.read())
models = data.get("data", [])
if models:
return models[0].get("id", "unknown")
except Exception:
pass
return "gemma-4"

View File

@@ -1,197 +0,0 @@
#!/usr/bin/env python3
"""
TurboQuant Server Manager
Manages llama-server lifecycle for integration tests:
- Start server with TurboQuant flags
- Wait for health check
- Stop server on teardown
Usage:
from tests.server_manager import TurboQuantServer
with TurboQuantServer(model_path="/path/to/model.gguf") as server:
url = server.url # e.g. http://localhost:8081
# Run tests against server
"""
import json
import os
import signal
import subprocess
import sys
import time
import urllib.request
import urllib.error
from pathlib import Path
from typing import Optional
class TurboQuantServer:
"""Context manager for llama-server with TurboQuant."""
def __init__(
self,
model_path: str,
port: int = 8081,
kv_type: str = "turbo4",
context_size: int = 32768,
server_binary: Optional[str] = None,
timeout: float = 60.0,
host: str = "127.0.0.1",
):
self.model_path = model_path
self.port = port
self.kv_type = kv_type
self.context_size = context_size
self.timeout = timeout
self.host = host
# Find server binary
if server_binary:
self.server_binary = server_binary
else:
# Try common locations
candidates = [
Path.home() / "llama-cpp-turboquant" / "build" / "bin" / "llama-server",
Path("/opt/llama-cpp-turboquant/build/bin/llama-server"),
Path("llama-server"), # PATH
]
self.server_binary = None
for c in candidates:
if c.exists() or c.name == "llama-server":
try:
subprocess.run([str(c), "--help"], capture_output=True, timeout=5)
self.server_binary = str(c)
break
except (FileNotFoundError, subprocess.TimeoutExpired):
continue
self.process: Optional[subprocess.Popen] = None
@property
def url(self) -> str:
return f"http://{self.host}:{self.port}"
def _build_command(self) -> list:
cmd = [
self.server_binary,
"-m", self.model_path,
"--port", str(self.port),
"--host", self.host,
"-ctk", self.kv_type,
"-ctv", self.kv_type,
"-c", str(self.context_size),
]
return cmd
def _check_health(self) -> bool:
try:
req = urllib.request.Request(f"{self.url}/v1/models")
resp = urllib.request.urlopen(req, timeout=5)
data = json.loads(resp.read())
return "data" in data and len(data.get("data", [])) > 0
except Exception:
return False
def start(self) -> str:
"""Start the server and wait for it to be healthy. Returns the server URL."""
if not self.server_binary:
raise RuntimeError(
"llama-server binary not found. Set server_binary or install to standard location."
)
if not Path(self.model_path).exists():
raise FileNotFoundError(f"Model not found: {self.model_path}")
cmd = self._build_command()
# Set TurboQuant env
env = os.environ.copy()
env["TURBO_LAYER_ADAPTIVE"] = "7"
self.process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env,
)
# Wait for health
start = time.time()
while time.time() - start < self.timeout:
if self.process.poll() is not None:
stderr = self.process.stderr.read().decode() if self.process.stderr else ""
raise RuntimeError(f"Server exited early (code {self.process.returncode}): {stderr[:500]}")
if self._check_health():
return self.url
time.sleep(1.0)
self.stop()
raise TimeoutError(f"Server did not become healthy within {self.timeout}s")
def stop(self):
"""Stop the server."""
if self.process:
try:
self.process.send_signal(signal.SIGTERM)
self.process.wait(timeout=10)
except subprocess.TimeoutExpired:
self.process.kill()
self.process.wait(timeout=5)
except Exception:
pass
self.process = None
def __enter__(self) -> "TurboQuantServer":
self.start()
return self
def __exit__(self, *args):
self.stop()
def find_server_binary() -> Optional[str]:
"""Find llama-server binary in common locations."""
candidates = [
Path.home() / "llama-cpp-turboquant" / "build" / "bin" / "llama-server",
Path("/opt/llama-cpp-turboquant/build/bin/llama-server"),
]
for c in candidates:
if c.exists():
return str(c)
# Try PATH
try:
result = subprocess.run(["which", "llama-server"], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
except Exception:
pass
return None
def find_model(model_dir: Optional[str] = None) -> Optional[str]:
"""Find a GGUF model file."""
search_dirs = [
model_dir,
os.environ.get("TURBOQUANT_MODEL_DIR"),
str(Path.home() / "models"),
"/opt/models",
"/tmp/models",
]
for d in search_dirs:
if not d:
continue
p = Path(d)
if p.is_file() and p.suffix == ".gguf":
return str(p)
if p.is_dir():
for f in sorted(p.rglob("*.gguf")):
return str(f)
return None