fix: add perplexity limitation docs to run_benchmarks.py (#63 )

test: perplexity quality gate tests (#63 )
feat: perplexity quality gate with Ollama proxy fallback (#63 )
2026-04-16 02:53:13 +00:00 · 2026-04-16 02:52:21 +00:00 · 2026-04-16 02:52:19 +00:00 · 2026-04-15 11:57:58 +00:00 · 2026-04-15 11:57:55 +00:00 · 2026-04-14 18:07:25 -04:00
8 changed files with 580 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 build/
 *.pyc
 __pycache__/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,36 @@
 cmake_minimum_required(VERSION 3.16)
 project(turboquant LANGUAGES CXX)
 option(TURBOQUANT_BUILD_TESTS "Build standalone TurboQuant validation tests" ON)
 add_library(turboquant STATIC
    llama-turbo.cpp
 )
 target_include_directories(turboquant PUBLIC
    ${CMAKE_CURRENT_SOURCE_DIR}
 )
 target_compile_features(turboquant PUBLIC cxx_std_17)
 if(MSVC)
    target_compile_options(turboquant PRIVATE /W4)
 else()
    target_compile_options(turboquant PRIVATE -Wall -Wextra -Wpedantic)
 endif()
 if(TURBOQUANT_BUILD_TESTS)
    include(CTest)
    add_executable(turboquant_roundtrip_test
        tests/roundtrip_test.cpp
    )
    target_link_libraries(turboquant_roundtrip_test PRIVATE turboquant)
    target_compile_features(turboquant_roundtrip_test PRIVATE cxx_std_17)
    add_test(
        NAME turboquant_roundtrip
        COMMAND turboquant_roundtrip_test
    )
 endif()
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Unlock 64K-128K context on qwen3.5:27b within 32GB unified memory.
 A 27B model at 128K context with TurboQuant beats a 72B at Q2 with 8K context.
 ## Status
-See [issues](http://143.198.27.163:3000/Timmy_Foundation/turboquant/issues) for current progress.
+See [issues](https://forge.alexanderwhitestone.com/Timmy_Foundation/turboquant/issues) for current progress.
 ## Roles
 - **Strago:** Build spec author
@@ -29,4 +29,4 @@ See [issues](http://143.198.27.163:3000/Timmy_Foundation/turboquant/issues) for
 - [rachittshah/mlx-turboquant](https://github.com/rachittshah/mlx-turboquant) — MLX fallback
 ## Docs
- [BUILD-SPEC.md](BUILD-SPEC.md) — Full build specification (Strago, v2.2)
+- [Project Status](docs/PROJECT_STATUS.md) — Full project status and build specification
--- a/benchmarks/quality_gate.py
+++ b/benchmarks/quality_gate.py
@@ -0,0 +1,308 @@
 #!/usr/bin/env python3
 """
 Perplexity Quality Gate — Unified PPL measurement for TurboQuant (#63).
 Provides a single interface for perplexity measurement regardless of backend:
 - llama-server: Real perplexity via llama-perplexity with --logprobs
 - Ollama:     Proxy metric with documented limitations
 Usage:
    # Real PPL via llama-server (recommended)
    python3 benchmarks/quality_gate.py \
        --backend llama-server \
        --model ~/models/model.gguf \
        --corpus corpora/wiki.test.raw
    # Proxy PPL via Ollama (documented limitation)
    python3 benchmarks/quality_gate.py \
        --backend ollama \
        --model llama3 \
        --corpus corpora/wiki.test.raw
    # CI mode — exit 1 if quality gate fails
    python3 benchmarks/quality_gate.py --check --threshold 0.5
 """
 import argparse
 import json
 import os
 import re
 import subprocess
 import sys
 import textwrap
 import time
 from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import Optional
@dataclass
 class PerplexityResult:
    """Result of a perplexity measurement."""
    backend: str           # "llama-server" or "ollama-proxy"
    kv_type: str           # "f16", "turbo4", etc.
    perplexity: Optional[float]
    is_proxy: bool         # True if this is an approximation, not real PPL
    tokens: Optional[int] = None
    elapsed_seconds: float = 0.0
    method: str = ""       # How PPL was measured
    exit_code: int = 0
    error: Optional[str] = None
    def to_dict(self) -> dict:
        return asdict(self)
@dataclass
 class QualityGateResult:
    """Result of a quality gate comparison."""
    f16: Optional[PerplexityResult]
    turbo4: Optional[PerplexityResult]
    delta: Optional[float]
    threshold: float
    passed: bool
    is_proxy: bool         # True if either measurement is proxy
    warning: str = ""
    def summary(self) -> str:
        lines = ["Perplexity Quality Gate", "=" * 40]
        if self.f16:
            lines.append(f"  F16:     PPL={self.f16.perplexity}  ({self.f16.backend}, proxy={self.f16.is_proxy})")
        if self.turbo4:
            lines.append(f"  Turbo4:  PPL={self.turbo4.perplexity}  ({self.turbo4.backend}, proxy={self.turbo4.is_proxy})")
        if self.delta is not None:
            lines.append(f"  Delta:   {self.delta:.4f}  (threshold={self.threshold})")
            status = "PASS" if self.passed else "FAIL"
            lines.append(f"  Result:  {status}")
        else:
            lines.append("  Result:  INCOMPLETE (missing measurements)")
        if self.warning:
            lines.append(f"  Warning: {self.warning}")
        if self.is_proxy:
            lines.append("  NOTE: Proxy measurement — not real perplexity via logprobs")
        return "\n".join(lines)
    def to_dict(self) -> dict:
        return {
            "f16": self.f16.to_dict() if self.f16 else None,
            "turbo4": self.turbo4.to_dict() if self.turbo4 else None,
            "delta": self.delta,
            "threshold": self.threshold,
            "passed": self.passed,
            "is_proxy": self.is_proxy,
            "warning": self.warning,
        }
 def measure_perplexity_llama_server(
    llama_bin: str, model: str, corpus: str, context: int,
    kv_type: str, threads: int = 4
 ) -> PerplexityResult:
    """Real perplexity via llama-perplexity binary (supports --logprobs)."""
    cmd = [
        llama_bin, "-m", model, "-f", corpus,
        "-c", str(context), "-t", str(threads),
        "--kv-type", kv_type,
    ]
    start = time.time()
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
        elapsed = time.time() - start
        output = result.stdout + "\n" + result.stderr
        ppl_match = re.search(r"perplexity[:\s]+(\d+\.?\d*)", output, re.IGNORECASE)
        ppl = float(ppl_match.group(1)) if ppl_match else None
        token_match = re.search(r"(\d+) tokens", output)
        tokens = int(token_match.group(1)) if token_match else None
        return PerplexityResult(
            backend="llama-server",
            kv_type=kv_type,
            perplexity=ppl,
            is_proxy=False,
            tokens=tokens,
            elapsed_seconds=round(elapsed, 1),
            method="llama-perplexity with --logprobs",
            exit_code=result.returncode,
        )
    except subprocess.TimeoutExpired:
        return PerplexityResult(
            backend="llama-server", kv_type=kv_type, perplexity=None,
            is_proxy=False, elapsed_seconds=3600, method="timeout",
            exit_code=-1, error="Timeout after 3600s",
        )
    except FileNotFoundError:
        return PerplexityResult(
            backend="llama-server", kv_type=kv_type, perplexity=None,
            is_proxy=False, method="binary not found",
            exit_code=-1, error=f"Binary not found: {llama_bin}",
        )
 def measure_perplexity_ollama_proxy(
    model: str, corpus: str, api_base: str = "http://localhost:11434"
 ) -> PerplexityResult:
    """
    Proxy perplexity estimation via Ollama.
    Ollama does NOT expose token logprobs. This method approximates
    perplexity by measuring generation coherence on the corpus text.
    This is a PROXY metric — not real perplexity. The actual PPL delta
    between FP16 and TurboQuant cannot be validated through this method.
    Use llama-server for real measurements.
    """
    import urllib.request
    # Read corpus sample (first 2048 chars to keep it fast)
    corpus_path = Path(corpus)
    if corpus_path.exists():
        sample = corpus_path.read_text()[:2048]
    else:
        sample = "The quick brown fox jumps over the lazy dog. " * 50
    # Use Ollama generate API to measure token throughput
    # This is the proxy metric: higher tok/s = lower effective perplexity
    start = time.time()
    try:
        payload = json.dumps({
            "model": model,
            "prompt": sample,
            "stream": False,
            "options": {"num_predict": 256},
        }).encode()
        req = urllib.request.Request(
            f"{api_base}/api/generate",
            data=payload,
            headers={"Content-Type": "application/json"},
        )
        resp = urllib.request.urlopen(req, timeout=120)
        data = json.loads(resp.read())
        elapsed = time.time() - start
        # Extract eval rate as proxy
        eval_count = data.get("eval_count", 0)
        eval_duration = data.get("eval_duration", 1)
        tok_per_sec = (eval_count / (eval_duration / 1e9)) if eval_duration > 0 else 0
        # Approximate PPL from tok/s (heuristic: faster = better quality preservation)
        # This is NOT real perplexity — it's a relative proxy
        proxy_ppl = max(1.0, 50.0 / max(tok_per_sec, 1.0))
        return PerplexityResult(
            backend="ollama-proxy",
            kv_type="f16",  # Ollama manages KV internally
            perplexity=round(proxy_ppl, 2),
            is_proxy=True,
            tokens=eval_count,
            elapsed_seconds=round(elapsed, 1),
            method=f"proxy: tok/s heuristic ({tok_per_sec:.1f} tok/s)",
            exit_code=0,
        )
    except Exception as e:
        return PerplexityResult(
            backend="ollama-proxy", kv_type="f16", perplexity=None,
            is_proxy=True, method="ollama proxy",
            exit_code=-1, error=str(e),
        )
 def run_quality_gate(
    backend: str = "llama-server",
    model: str = "",
    corpus: str = "corpora/wiki.test.raw",
    context: int = 2048,
    threads: int = 4,
    llama_bin: str = "llama.cpp-fork/build/bin/llama-perplexity",
    threshold: float = 0.5,
    ollama_base: str = "http://localhost:11434",
 ) -> QualityGateResult:
    """Run quality gate: measure F16 vs Turbo4 PPL and check delta."""
    if backend == "llama-server":
        f16 = measure_perplexity_llama_server(llama_bin, model, corpus, context, "f16", threads)
        turbo4 = measure_perplexity_llama_server(llama_bin, model, corpus, context, "turbo4", threads)
    elif backend == "ollama":
        f16 = measure_perplexity_ollama_proxy(model, corpus, ollama_base)
        turbo4 = None  # Can't measure turbo4 via Ollama
    else:
        return QualityGateResult(
            f16=None, turbo4=None, delta=None,
            threshold=threshold, passed=False, is_proxy=True,
            warning=f"Unknown backend: {backend}",
        )
    # Compute delta
    delta = None
    passed = False
    is_proxy = f16.is_proxy or (turbo4.is_proxy if turbo4 else True)
    warning = ""
    if f16.perplexity is not None and turbo4 and turbo4.perplexity is not None:
        delta = turbo4.perplexity - f16.perplexity
        passed = delta <= threshold
    elif f16.perplexity is not None and turbo4 is None:
        warning = "Only F16 measured — cannot compute delta (turbo4 not available)"
    if is_proxy:
        warning += " PROXY measurement — not real perplexity via logprobs."
    return QualityGateResult(
        f16=f16, turbo4=turbo4, delta=delta,
        threshold=threshold, passed=passed,
        is_proxy=is_proxy, warning=warning.strip(),
    )
 def main():
    parser = argparse.ArgumentParser(description="Perplexity Quality Gate (#63)")
    parser.add_argument("--backend", choices=["llama-server", "ollama"], default="llama-server")
    parser.add_argument("--model", required=True, help="Model path (GGUF) or Ollama model name")
    parser.add_argument("--corpus", default="corpora/wiki.test.raw")
    parser.add_argument("--context", type=int, default=2048)
    parser.add_argument("--threads", type=int, default=4)
    parser.add_argument("--llama-bin", default="llama.cpp-fork/build/bin/llama-perplexity")
    parser.add_argument("--threshold", type=float, default=0.5)
    parser.add_argument("--ollama-base", default="http://localhost:11434")
    parser.add_argument("--output", default="benchmarks/perplexity_results.json")
    parser.add_argument("--check", action="store_true", help="CI mode: exit 1 if gate fails")
    args = parser.parse_args()
    result = run_quality_gate(
        backend=args.backend, model=args.model, corpus=args.corpus,
        context=args.context, threads=args.threads, llama_bin=args.llama_bin,
        threshold=args.threshold, ollama_base=args.ollama_base,
    )
    print(result.summary())
    # Save results
    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    existing = {}
    if output_path.exists():
        try:
            existing = json.loads(output_path.read_text())
        except json.JSONDecodeError:
            pass
    existing.update({
        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "model": args.model,
        "corpus": args.corpus,
        "context_length": args.context,
        "threshold": args.threshold,
        "quality_gate": result.to_dict(),
    })
    output_path.write_text(json.dumps(existing, indent=2))
    if args.check and not result.passed:
        sys.exit(1)
    sys.exit(0)
 if __name__ == "__main__":
    main()
--- a/benchmarks/run_benchmarks.py
+++ b/benchmarks/run_benchmarks.py
@@ -5,8 +5,16 @@ TurboQuant Benchmarking Suite — Multi-Backend (Issue #29)
 Supports Ollama and llama-server backends with KV cache type configuration.
 Measures: TTFT, tokens/sec, latency, peak memory.
 IMPORTANT — Perplexity Limitation (Issue #63):
  Ollama does NOT expose token logprobs. This means:
  - True perplexity (PPL) cannot be measured via the Ollama backend
  - The metrics here (tok/s, latency) are throughput proxies, not quality gates
  - For real perplexity measurement, use benchmarks/run_perplexity.py
    which calls llama-perplexity directly (--logprobs support)
  - The pass criterion "PPL delta <= 0.5" cannot be validated via Ollama
 Usage:
-    # Ollama (default)
+    # Ollama (default) — throughput benchmarks only, NOT perplexity
    python3 benchmarks/run_benchmarks.py --backend ollama --model llama3
    # llama-server with turbo4 KV
--- a/profiles/README.md
+++ b/profiles/README.md
@@ -135,7 +135,5 @@ llama-server -m model.gguf --port 8081 -ctk q8_0 -ctv turbo4 -c 131072
 ## References
- [TurboQuant Build Spec](../BUILD-SPEC.md)
+- [Project Status](../docs/PROJECT_STATUS.md)
 - [Phase 1 Report](../PHASE1-REPORT.md)
 - [Full Knowledge Transfer](../FULL-REPORT.md)
 - [llama.cpp TurboQuant Fork](https://github.com/TheTom/llama-cpp-turboquant)
--- a/tests/roundtrip_test.cpp
+++ b/tests/roundtrip_test.cpp
@@ -0,0 +1,104 @@
 #include "llama-turbo.h"
 #include <cmath>
 #include <cstdint>
 #include <iostream>
 #include <random>
 #include <string>
 #include <vector>
 namespace {
 constexpr int kDim = 128;
 constexpr float kCosineThreshold = 0.99f;
 constexpr float kZeroTolerance = 1.0e-6f;
 [[nodiscard]] bool all_finite(const std::vector<float> & values) {
    for (float value : values) {
        if (!std::isfinite(value)) {
            return false;
        }
    }
    return true;
 }
 [[nodiscard]] float max_abs(const std::vector<float> & values) {
    float best = 0.0f;
    for (float value : values) {
        best = std::max(best, std::fabs(value));
    }
    return best;
 }
 [[nodiscard]] float cosine_similarity(const std::vector<float> & lhs, const std::vector<float> & rhs) {
    float dot = 0.0f;
    float lhs_norm = 0.0f;
    float rhs_norm = 0.0f;
    for (int i = 0; i < kDim; ++i) {
        dot += lhs[i] * rhs[i];
        lhs_norm += lhs[i] * lhs[i];
        rhs_norm += rhs[i] * rhs[i];
    }
    const float denom = std::sqrt(lhs_norm) * std::sqrt(rhs_norm);
    return denom == 0.0f ? 1.0f : dot / denom;
 }
 [[nodiscard]] std::vector<float> roundtrip(const std::vector<float> & input, float & norm_out) {
    std::vector<uint8_t> packed(kDim / 2, 0);
    norm_out = -1.0f;
    polar_quant_encode_turbo4(input.data(), packed.data(), &norm_out, kDim);
    std::vector<float> decoded(kDim, 0.0f);
    polar_quant_decode_turbo4(packed.data(), decoded.data(), norm_out, kDim);
    return decoded;
 }
 void require(bool condition, const std::string & message) {
    if (!condition) {
        throw std::runtime_error(message);
    }
 }
 void test_zero_vector_roundtrip() {
    std::vector<float> zeros(kDim, 0.0f);
    float norm = -1.0f;
    const auto decoded = roundtrip(zeros, norm);
    require(norm == 0.0f, "zero vector should encode with zero norm");
    require(all_finite(decoded), "zero vector decode produced non-finite values");
    require(max_abs(decoded) <= kZeroTolerance, "zero vector decode should remain near zero");
 }
 void test_gaussian_roundtrip_quality() {
    std::mt19937 rng(12345);
    std::normal_distribution<float> dist(0.0f, 1.0f);
    std::vector<float> input(kDim, 0.0f);
    for (float & value : input) {
        value = dist(rng);
    }
    float norm = -1.0f;
    const auto decoded = roundtrip(input, norm);
    require(norm > 0.0f, "random vector should encode with positive norm");
    require(all_finite(decoded), "random vector decode produced non-finite values");
    const float cosine = cosine_similarity(input, decoded);
    require(cosine >= kCosineThreshold, "roundtrip cosine similarity below threshold");
 }
 }  // namespace
 int main() {
    try {
        test_zero_vector_roundtrip();
        test_gaussian_roundtrip_quality();
        std::cout << "PASS: turboquant standalone roundtrip tests\n";
        return 0;
    } catch (const std::exception & exc) {
        std::cerr << "FAIL: " << exc.what() << '\n';
        return 1;
    }
 }
--- a/tests/test_quality_gate.py
+++ b/tests/test_quality_gate.py
@@ -0,0 +1,117 @@
 #!/usr/bin/env python3
 """Tests for benchmarks/quality_gate.py — Perplexity Quality Gate (#63)."""
 import json
 import os
 import sys
 import tempfile
 import textwrap
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "benchmarks"))
 from quality_gate import (
    PerplexityResult,
    QualityGateResult,
    measure_perplexity_ollama_proxy,
    run_quality_gate,
 )
 class TestPerplexityResult:
    def test_to_dict(self):
        r = PerplexityResult(
            backend="llama-server", kv_type="f16",
            perplexity=12.5, is_proxy=False, tokens=1000,
            elapsed_seconds=10.0, method="llama-perplexity", exit_code=0,
        )
        d = r.to_dict()
        assert d["backend"] == "llama-server"
        assert d["perplexity"] == 12.5
        assert d["is_proxy"] is False
    def test_proxy_flag(self):
        r = PerplexityResult(
            backend="ollama-proxy", kv_type="f16",
            perplexity=3.2, is_proxy=True, method="proxy heuristic",
        )
        assert r.is_proxy is True
 class TestQualityGateResult:
    def test_pass(self):
        f16 = PerplexityResult("llama-server", "f16", 10.0, False)
        turbo4 = PerplexityResult("llama-server", "turbo4", 10.3, False)
        gate = QualityGateResult(f16=f16, turbo4=turbo4, delta=0.3, threshold=0.5, passed=True, is_proxy=False)
        assert gate.passed is True
        assert gate.delta == 0.3
    def test_fail(self):
        f16 = PerplexityResult("llama-server", "f16", 10.0, False)
        turbo4 = PerplexityResult("llama-server", "turbo4", 11.0, False)
        gate = QualityGateResult(f16=f16, turbo4=turbo4, delta=1.0, threshold=0.5, passed=False, is_proxy=False)
        assert gate.passed is False
    def test_proxy_warning(self):
        f16 = PerplexityResult("ollama-proxy", "f16", 5.0, True)
        gate = QualityGateResult(f16=f16, turbo4=None, delta=None, threshold=0.5, passed=False, is_proxy=True, warning="Only F16 measured")
        assert gate.is_proxy is True
        summary = gate.summary()
        assert "PROXY" in summary or "Proxy" in summary
    def test_to_dict(self):
        f16 = PerplexityResult("llama-server", "f16", 10.0, False)
        gate = QualityGateResult(f16=f16, turbo4=None, delta=None, threshold=0.5, passed=False, is_proxy=False)
        d = gate.to_dict()
        assert d["f16"]["perplexity"] == 10.0
        assert d["turbo4"] is None
        assert d["delta"] is None
    def test_summary_format(self):
        f16 = PerplexityResult("llama-server", "f16", 10.0, False)
        turbo4 = PerplexityResult("llama-server", "turbo4", 10.2, False)
        gate = QualityGateResult(f16=f16, turbo4=turbo4, delta=0.2, threshold=0.5, passed=True, is_proxy=False)
        summary = gate.summary()
        assert "F16" in summary
        assert "Turbo4" in summary
        assert "PASS" in summary
        assert "0.2000" in summary
 class TestOllamaProxy:
    def test_with_corpus_file(self):
        with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
            f.write("The quick brown fox jumps over the lazy dog.\n" * 100)
            f.flush()
            result = measure_perplexity_ollama_proxy("test-model", f.name)
        os.unlink(f.name)
        # Result should be proxy
        assert result.is_proxy is True
        assert result.backend == "ollama-proxy"
    def test_with_missing_corpus(self):
        result = measure_perplexity_ollama_proxy("test-model", "/nonexistent/corpus.txt")
        assert result.is_proxy is True
 class TestRunQualityGate:
    def test_unknown_backend(self):
        result = run_quality_gate(backend="unknown", model="test")
        assert result.passed is False
        assert "Unknown backend" in result.warning
    def test_llama_server_missing_binary(self):
        result = run_quality_gate(
            backend="llama-server",
            model="test.gguf",
            corpus="/tmp/nonexistent_corpus.txt",
            llama_bin="/nonexistent/llama-perplexity",
        )
        assert result.f16 is not None
        assert result.f16.error is not None
        assert "not found" in result.f16.error.lower()
 if __name__ == "__main__":
    import unittest
    unittest.main()
Author	SHA1	Message	Date
Alexander Whitestone	fa9d4d569b	fix: add perplexity limitation docs to run_benchmarks.py (#63 ) All checks were successful Smoke Test / smoke (pull_request) Successful in 14s Details	2026-04-16 02:53:13 +00:00
Alexander Whitestone	ea7f89cc2d	test: perplexity quality gate tests (#63 )	2026-04-16 02:52:21 +00:00
Alexander Whitestone	aa4bd38acf	feat: perplexity quality gate with Ollama proxy fallback (#63 )	2026-04-16 02:52:19 +00:00
Timmy Time	3cd8750cbb	Merge pull request 'feat: standalone build system and roundtrip tests - #17 ' (#51 ) from dispatch/17-1776180746 into main All checks were successful Smoke Test / smoke (pull_request) Successful in 15s Details	2026-04-15 11:57:58 +00:00
Timmy Time	ef765bbd30	Merge pull request 'fix(docs): resolve broken markdown links and stale forge URL' (#52 ) from burn/fix-doc-links into main	2026-04-15 11:57:55 +00:00
Hermes Agent	5f0d00f127	fix(docs): resolve broken markdown links and stale forge URL All checks were successful Smoke Test / smoke (pull_request) Successful in 6s Details - Update raw-IP forge URL to canonical forge domain in README.md (fixes #46) - Update 4 broken local markdown links pointing to deleted BUILD-SPEC.md, PHASE1-REPORT.md, FULL-REPORT.md to docs/PROJECT_STATUS.md (fixes #44)	2026-04-14 18:07:25 -04:00
Alexander Whitestone	8affe79489	cleanup: remove committed .pyc and redundant Python test, add .gitignore All checks were successful Smoke Test / smoke (pull_request) Successful in 11s Details	2026-04-14 11:34:38 -04:00
Alexander Whitestone	319f57780d	feat: add standalone build system and roundtrip tests (Issue #17 ) - CMakeLists.txt: builds turboquant as static library - TURBOQUANT_BUILD_TESTS option enables ctest roundtrip tests - tests/roundtrip_test.cpp: validates zero-vector roundtrip and gaussian cosine similarity (>=0.99) - Makefile wrapper for convenience (build/test/clean targets) - Addresses contributor feedback on spec-to-code gap and CI from #17	2026-04-14 11:34:38 -04:00