Compare commits
2 Commits
fix/72-con
...
q/17-17761
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ce1e3d71b | ||
|
|
6818dc7652 |
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
build/
|
||||
*.pyc
|
||||
__pycache__/
|
||||
36
CMakeLists.txt
Normal file
36
CMakeLists.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
project(turboquant LANGUAGES CXX)
|
||||
|
||||
option(TURBOQUANT_BUILD_TESTS "Build standalone TurboQuant validation tests" ON)
|
||||
|
||||
add_library(turboquant STATIC
|
||||
llama-turbo.cpp
|
||||
)
|
||||
|
||||
target_include_directories(turboquant PUBLIC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_compile_features(turboquant PUBLIC cxx_std_17)
|
||||
|
||||
if(MSVC)
|
||||
target_compile_options(turboquant PRIVATE /W4)
|
||||
else()
|
||||
target_compile_options(turboquant PRIVATE -Wall -Wextra -Wpedantic)
|
||||
endif()
|
||||
|
||||
if(TURBOQUANT_BUILD_TESTS)
|
||||
include(CTest)
|
||||
|
||||
add_executable(turboquant_roundtrip_test
|
||||
tests/roundtrip_test.cpp
|
||||
)
|
||||
target_link_libraries(turboquant_roundtrip_test PRIVATE turboquant)
|
||||
target_compile_features(turboquant_roundtrip_test PRIVATE cxx_std_17)
|
||||
|
||||
add_test(
|
||||
NAME turboquant_roundtrip
|
||||
COMMAND turboquant_roundtrip_test
|
||||
)
|
||||
endif()
|
||||
@@ -1,319 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
TurboQuant Constant-Time Benchmark — Issue #72
|
||||
|
||||
Benchmarks constant-time (side-channel resistant) vs original quantization.
|
||||
Measures encode latency, decode latency, and memory bandwidth impact.
|
||||
|
||||
Usage:
|
||||
python3 benchmarks/constant_time_benchmark.py --size 4096 --iterations 100
|
||||
python3 benchmarks/constant_time_benchmark.py --json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import statistics
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quantization kernels (Python reference implementations)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import struct
|
||||
import math
|
||||
|
||||
|
||||
def quantize_fp16_to_q4_0_original(weights: list[float]) -> bytes:
|
||||
"""Original quantization: FP16 → Q4_0 (block size 32).
|
||||
|
||||
Each block: 2 bytes scale (FP16) + 16 bytes quants (4-bit packed).
|
||||
Non-constant-time: early exits, branching on zero detection.
|
||||
"""
|
||||
block_size = 32
|
||||
n_blocks = len(weights) // block_size
|
||||
output = bytearray()
|
||||
|
||||
for b in range(n_blocks):
|
||||
block = weights[b * block_size:(b + 1) * block_size]
|
||||
|
||||
# Find absmax
|
||||
absmax = 0.0
|
||||
for w in block:
|
||||
absmax = max(absmax, abs(w))
|
||||
|
||||
if absmax == 0.0:
|
||||
# Early exit — branch prediction leak
|
||||
output.extend(struct.pack('<e', 0.0))
|
||||
output.extend(bytes(16))
|
||||
continue
|
||||
|
||||
d = absmax / 7.0 # scale
|
||||
id_val = 1.0 / d if d != 0 else 0.0 # Branch on zero
|
||||
|
||||
# Pack 4-bit quants
|
||||
packed = bytearray(16)
|
||||
for i in range(0, block_size, 2):
|
||||
xi0 = int(round(block[i] * id_val)) + 8
|
||||
xi1 = int(round(block[i + 1] * id_val)) if i + 1 < block_size else 8
|
||||
xi0 = max(0, min(15, xi0))
|
||||
xi1 = max(0, min(15, xi1))
|
||||
packed[i // 2] = xi0 | (xi1 << 4)
|
||||
|
||||
output.extend(struct.pack('<e', d))
|
||||
output.extend(packed)
|
||||
|
||||
return bytes(output)
|
||||
|
||||
|
||||
def quantize_fp16_to_q4_0_constant_time(weights: list[float]) -> bytes:
|
||||
"""Constant-time quantization: FP16 → Q4_0.
|
||||
|
||||
No early exits, no branches on data values. Same output as original
|
||||
but timing does not leak information about weight distribution.
|
||||
"""
|
||||
block_size = 32
|
||||
n_blocks = len(weights) // block_size
|
||||
output = bytearray()
|
||||
|
||||
for b in range(n_blocks):
|
||||
block = weights[b * block_size:(b + 1) * block_size]
|
||||
|
||||
# Find absmax — no early exit on zero
|
||||
absmax = 0.0
|
||||
for w in block:
|
||||
absval = abs(w)
|
||||
# Constant-time max: no branch, always compute both paths
|
||||
absmax = absval if absval > absmax else absmax
|
||||
|
||||
# Constant-time scale computation — no branch on zero
|
||||
d = absmax / 7.0
|
||||
# Constant-time inverse: compute 1/d but guard against zero
|
||||
d_nonzero = 1.0 if d != 0.0 else 0.0
|
||||
safe_d = d if d != 0.0 else 1.0 # Avoid division by zero
|
||||
id_val = (1.0 / safe_d) * d_nonzero
|
||||
|
||||
# Always compute quants (even when scale=0, producing all zeros)
|
||||
packed = bytearray(16)
|
||||
for i in range(0, block_size, 2):
|
||||
xi0 = int(round(block[i] * id_val)) + 8
|
||||
xi1 = int(round(block[i + 1] * id_val)) + 8 if i + 1 < block_size else 8
|
||||
# Constant-time clamp: no branch
|
||||
xi0 = max(0, min(15, xi0))
|
||||
xi1 = max(0, min(15, xi1))
|
||||
packed[i // 2] = xi0 | (xi1 << 4)
|
||||
|
||||
output.extend(struct.pack('<e', d))
|
||||
output.extend(packed)
|
||||
|
||||
return bytes(output)
|
||||
|
||||
|
||||
def dequantize_q4_0_original(data: bytes, n: int) -> list[float]:
|
||||
"""Original dequantization: Q4_0 → FP32."""
|
||||
block_size = 32
|
||||
bytes_per_block = 18 # 2 scale + 16 quants
|
||||
n_blocks = n // block_size
|
||||
weights = []
|
||||
|
||||
for b in range(n_blocks):
|
||||
offset = b * bytes_per_block
|
||||
d = struct.unpack_from('<e', data, offset)[0]
|
||||
quants = data[offset + 2:offset + 18]
|
||||
|
||||
for i in range(16):
|
||||
byte_val = quants[i]
|
||||
xi0 = (byte_val & 0x0F) - 8
|
||||
xi1 = ((byte_val >> 4) & 0x0F) - 8
|
||||
weights.append(xi0 * d)
|
||||
if len(weights) < n:
|
||||
weights.append(xi1 * d)
|
||||
|
||||
return weights[:n]
|
||||
|
||||
|
||||
def dequantize_q4_0_constant_time(data: bytes, n: int) -> list[float]:
|
||||
"""Constant-time dequantization: Q4_0 → FP32."""
|
||||
block_size = 32
|
||||
bytes_per_block = 18
|
||||
n_blocks = n // block_size
|
||||
weights = []
|
||||
|
||||
for b in range(n_blocks):
|
||||
offset = b * bytes_per_block
|
||||
d = struct.unpack_from('<e', data, offset)[0]
|
||||
quants = data[offset + 2:offset + 18]
|
||||
|
||||
# Always process all 16 bytes, even if we've exceeded n
|
||||
for i in range(16):
|
||||
byte_val = quants[i]
|
||||
xi0 = (byte_val & 0x0F) - 8
|
||||
xi1 = ((byte_val >> 4) & 0x0F) - 8
|
||||
if len(weights) < n:
|
||||
weights.append(xi0 * d)
|
||||
if len(weights) < n:
|
||||
weights.append(xi1 * d)
|
||||
|
||||
return weights[:n]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Benchmark harness
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def benchmark(fn: Callable, args: tuple, iterations: int) -> dict:
|
||||
"""Benchmark a function over N iterations."""
|
||||
# Warmup
|
||||
for _ in range(min(3, iterations)):
|
||||
fn(*args)
|
||||
|
||||
latencies = []
|
||||
for _ in range(iterations):
|
||||
start = time.perf_counter()
|
||||
fn(*args)
|
||||
elapsed = time.perf_counter() - start
|
||||
latencies.append(elapsed * 1000) # ms
|
||||
|
||||
return {
|
||||
"iterations": iterations,
|
||||
"mean_ms": round(statistics.mean(latencies), 4),
|
||||
"median_ms": round(statistics.median(latencies), 4),
|
||||
"std_ms": round(statistics.stdev(latencies) if len(latencies) > 1 else 0, 4),
|
||||
"min_ms": round(min(latencies), 4),
|
||||
"max_ms": round(max(latencies), 4),
|
||||
"p95_ms": round(sorted(latencies)[int(len(latencies) * 0.95)], 4),
|
||||
"p99_ms": round(sorted(latencies)[int(len(latencies) * 0.99)], 4),
|
||||
}
|
||||
|
||||
|
||||
def generate_weights(size: int) -> list[float]:
|
||||
"""Generate test weights."""
|
||||
import random
|
||||
random.seed(42)
|
||||
return [random.gauss(0, 1) for _ in range(size)]
|
||||
|
||||
|
||||
def run_benchmarks(size: int, iterations: int) -> dict:
|
||||
"""Run full benchmark suite."""
|
||||
weights = generate_weights(size)
|
||||
|
||||
print(f"Benchmarking {size} weights x {iterations} iterations...", file=sys.stderr)
|
||||
|
||||
# Encode benchmarks
|
||||
print(" Encode original...", file=sys.stderr)
|
||||
encode_orig = benchmark(quantize_fp16_to_q4_0_original, (weights,), iterations)
|
||||
|
||||
print(" Encode constant-time...", file=sys.stderr)
|
||||
encode_ct = benchmark(quantize_fp16_to_q4_0_constant_time, (weights,), iterations)
|
||||
|
||||
# Decode benchmarks
|
||||
encoded_orig = quantize_fp16_to_q4_0_original(weights)
|
||||
print(" Decode original...", file=sys.stderr)
|
||||
decode_orig = benchmark(dequantize_q4_0_original, (encoded_orig, size), iterations)
|
||||
|
||||
encoded_ct = quantize_fp16_to_q4_0_constant_time(weights)
|
||||
print(" Decode constant-time...", file=sys.stderr)
|
||||
decode_ct = benchmark(dequantize_q4_0_constant_time, (encoded_ct, size), iterations)
|
||||
|
||||
# Correctness check
|
||||
decoded_orig = dequantize_q4_0_original(encoded_orig, size)
|
||||
decoded_ct = dequantize_q4_0_constant_time(encoded_ct, size)
|
||||
max_diff = max(abs(a - b) for a, b in zip(decoded_orig, decoded_ct))
|
||||
|
||||
# Overhead analysis
|
||||
encode_overhead = (encode_ct["mean_ms"] / max(encode_orig["mean_ms"], 0.001) - 1) * 100
|
||||
decode_overhead = (decode_ct["mean_ms"] / max(decode_orig["mean_ms"], 0.001) - 1) * 100
|
||||
|
||||
return {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"config": {"weight_count": size, "iterations": iterations, "block_size": 32},
|
||||
"encode": {"original": encode_orig, "constant_time": encode_ct},
|
||||
"decode": {"original": decode_orig, "constant_time": decode_ct},
|
||||
"correctness": {
|
||||
"max_decode_diff": round(max_diff, 10),
|
||||
"outputs_match": max_diff < 1e-6,
|
||||
},
|
||||
"overhead": {
|
||||
"encode_pct": round(encode_overhead, 2),
|
||||
"decode_pct": round(decode_overhead, 2),
|
||||
},
|
||||
"memory": {
|
||||
"original_bytes": len(encoded_orig),
|
||||
"constant_time_bytes": len(encoded_ct),
|
||||
"compression_ratio": round(size * 4 / len(encoded_orig), 2),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def to_markdown(report: dict) -> str:
|
||||
enc = report["encode"]
|
||||
dec = report["decode"]
|
||||
ov = report["overhead"]
|
||||
mem = report["memory"]
|
||||
cor = report["correctness"]
|
||||
|
||||
lines = [
|
||||
"# Constant-Time Benchmark Report",
|
||||
"",
|
||||
f"Generated: {report['generated_at'][:16]}",
|
||||
f"Config: {report['config']['weight_count']} weights, {report['config']['iterations']} iterations",
|
||||
"",
|
||||
"## Encode Latency",
|
||||
"",
|
||||
"| Impl | Mean (ms) | Median | P95 | P99 | Overhead |",
|
||||
"|------|-----------|--------|-----|-----|----------|",
|
||||
f"| Original | {enc['original']['mean_ms']:.2f} | {enc['original']['median_ms']:.2f} | {enc['original']['p95_ms']:.2f} | {enc['original']['p99_ms']:.2f} | baseline |",
|
||||
f"| Constant-time | {enc['constant_time']['mean_ms']:.2f} | {enc['constant_time']['median_ms']:.2f} | {enc['constant_time']['p95_ms']:.2f} | {enc['constant_time']['p99_ms']:.2f} | +{ov['encode_pct']:.1f}% |",
|
||||
"",
|
||||
"## Decode Latency",
|
||||
"",
|
||||
"| Impl | Mean (ms) | Median | P95 | P99 | Overhead |",
|
||||
"|------|-----------|--------|-----|-----|----------|",
|
||||
f"| Original | {dec['original']['mean_ms']:.2f} | {dec['original']['median_ms']:.2f} | {dec['original']['p95_ms']:.2f} | {dec['original']['p99_ms']:.2f} | baseline |",
|
||||
f"| Constant-time | {dec['constant_time']['mean_ms']:.2f} | {dec['constant_time']['median_ms']:.2f} | {dec['constant_time']['p95_ms']:.2f} | {dec['constant_time']['p99_ms']:.2f} | +{ov['decode_pct']:.1f}% |",
|
||||
"",
|
||||
"## Correctness",
|
||||
"",
|
||||
f"- Max decode difference: {cor['max_decode_diff']:.10f}",
|
||||
f"- Outputs match: {'✅ Yes' if cor['outputs_match'] else '❌ No'}",
|
||||
"",
|
||||
"## Memory",
|
||||
"",
|
||||
f"- Compressed size: {mem['original_bytes']} bytes ({mem['compression_ratio']:.1f}x compression)",
|
||||
f"- Constant-time size: {mem['constant_time_bytes']} bytes (same format)",
|
||||
"",
|
||||
"## Verdict",
|
||||
"",
|
||||
]
|
||||
|
||||
if ov['encode_pct'] < 10 and ov['decode_pct'] < 10:
|
||||
lines.append("**Constant-time overhead is acceptable (<10%).** Safe for production.")
|
||||
elif ov['encode_pct'] < 25 and ov['decode_pct'] < 25:
|
||||
lines.append("**Constant-time overhead is moderate (10-25%).** Acceptible for security-sensitive deployments.")
|
||||
else:
|
||||
lines.append("**Constant-time overhead is significant (>25%).** Consider optimizing or using original for non-sensitive workloads.")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Constant-time benchmark")
|
||||
parser.add_argument("--size", type=int, default=4096, help="Weight count")
|
||||
parser.add_argument("--iterations", type=int, default=100, help="Iterations")
|
||||
parser.add_argument("--json", action="store_true", help="JSON output")
|
||||
args = parser.parse_args()
|
||||
|
||||
report = run_benchmarks(args.size, args.iterations)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(report, indent=2))
|
||||
else:
|
||||
print(to_markdown(report))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
104
tests/roundtrip_test.cpp
Normal file
104
tests/roundtrip_test.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
#include "llama-turbo.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kDim = 128;
|
||||
constexpr float kCosineThreshold = 0.99f;
|
||||
constexpr float kZeroTolerance = 1.0e-6f;
|
||||
|
||||
[[nodiscard]] bool all_finite(const std::vector<float> & values) {
|
||||
for (float value : values) {
|
||||
if (!std::isfinite(value)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] float max_abs(const std::vector<float> & values) {
|
||||
float best = 0.0f;
|
||||
for (float value : values) {
|
||||
best = std::max(best, std::fabs(value));
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
[[nodiscard]] float cosine_similarity(const std::vector<float> & lhs, const std::vector<float> & rhs) {
|
||||
float dot = 0.0f;
|
||||
float lhs_norm = 0.0f;
|
||||
float rhs_norm = 0.0f;
|
||||
for (int i = 0; i < kDim; ++i) {
|
||||
dot += lhs[i] * rhs[i];
|
||||
lhs_norm += lhs[i] * lhs[i];
|
||||
rhs_norm += rhs[i] * rhs[i];
|
||||
}
|
||||
|
||||
const float denom = std::sqrt(lhs_norm) * std::sqrt(rhs_norm);
|
||||
return denom == 0.0f ? 1.0f : dot / denom;
|
||||
}
|
||||
|
||||
[[nodiscard]] std::vector<float> roundtrip(const std::vector<float> & input, float & norm_out) {
|
||||
std::vector<uint8_t> packed(kDim / 2, 0);
|
||||
norm_out = -1.0f;
|
||||
polar_quant_encode_turbo4(input.data(), packed.data(), &norm_out, kDim);
|
||||
|
||||
std::vector<float> decoded(kDim, 0.0f);
|
||||
polar_quant_decode_turbo4(packed.data(), decoded.data(), norm_out, kDim);
|
||||
return decoded;
|
||||
}
|
||||
|
||||
void require(bool condition, const std::string & message) {
|
||||
if (!condition) {
|
||||
throw std::runtime_error(message);
|
||||
}
|
||||
}
|
||||
|
||||
void test_zero_vector_roundtrip() {
|
||||
std::vector<float> zeros(kDim, 0.0f);
|
||||
float norm = -1.0f;
|
||||
const auto decoded = roundtrip(zeros, norm);
|
||||
|
||||
require(norm == 0.0f, "zero vector should encode with zero norm");
|
||||
require(all_finite(decoded), "zero vector decode produced non-finite values");
|
||||
require(max_abs(decoded) <= kZeroTolerance, "zero vector decode should remain near zero");
|
||||
}
|
||||
|
||||
void test_gaussian_roundtrip_quality() {
|
||||
std::mt19937 rng(12345);
|
||||
std::normal_distribution<float> dist(0.0f, 1.0f);
|
||||
|
||||
std::vector<float> input(kDim, 0.0f);
|
||||
for (float & value : input) {
|
||||
value = dist(rng);
|
||||
}
|
||||
|
||||
float norm = -1.0f;
|
||||
const auto decoded = roundtrip(input, norm);
|
||||
|
||||
require(norm > 0.0f, "random vector should encode with positive norm");
|
||||
require(all_finite(decoded), "random vector decode produced non-finite values");
|
||||
|
||||
const float cosine = cosine_similarity(input, decoded);
|
||||
require(cosine >= kCosineThreshold, "roundtrip cosine similarity below threshold");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
try {
|
||||
test_zero_vector_roundtrip();
|
||||
test_gaussian_roundtrip_quality();
|
||||
std::cout << "PASS: turboquant standalone roundtrip tests\n";
|
||||
return 0;
|
||||
} catch (const std::exception & exc) {
|
||||
std::cerr << "FAIL: " << exc.what() << '\n';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,118 +0,0 @@
|
||||
"""Tests for constant-time benchmark (Issue #72)."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "benchmarks"))
|
||||
|
||||
from constant_time_benchmark import (
|
||||
quantize_fp16_to_q4_0_original,
|
||||
quantize_fp16_to_q4_0_constant_time,
|
||||
dequantize_q4_0_original,
|
||||
dequantize_q4_0_constant_time,
|
||||
benchmark,
|
||||
generate_weights,
|
||||
to_markdown,
|
||||
)
|
||||
|
||||
|
||||
class TestQuantize:
|
||||
def test_original_produces_output(self):
|
||||
weights = [0.1, -0.2, 0.3] * 11 # 33 -> truncate to 32
|
||||
result = quantize_fp16_to_q4_0_original(weights[:32])
|
||||
assert len(result) == 18 # 1 block = 2 + 16
|
||||
|
||||
def test_constant_time_produces_output(self):
|
||||
weights = [0.1, -0.2, 0.3] * 11
|
||||
result = quantize_fp16_to_q4_0_constant_time(weights[:32])
|
||||
assert len(result) == 18
|
||||
|
||||
def test_zero_weights(self):
|
||||
weights = [0.0] * 32
|
||||
orig = quantize_fp16_to_q4_0_original(weights)
|
||||
ct = quantize_fp16_to_q4_0_constant_time(weights)
|
||||
assert len(orig) == len(ct)
|
||||
|
||||
def test_multiple_blocks(self):
|
||||
weights = [0.1 * i for i in range(128)] # 4 blocks
|
||||
result = quantize_fp16_to_q4_0_constant_time(weights)
|
||||
assert len(result) == 4 * 18
|
||||
|
||||
|
||||
class TestDequantize:
|
||||
def test_roundtrip_original(self):
|
||||
weights = [0.1 * i for i in range(32)]
|
||||
encoded = quantize_fp16_to_q4_0_original(weights)
|
||||
decoded = dequantize_q4_0_original(encoded, 32)
|
||||
assert len(decoded) == 32
|
||||
# Q4 is very lossy with small weights — just check structure is correct
|
||||
assert all(isinstance(w, float) for w in decoded)
|
||||
|
||||
def test_roundtrip_constant_time(self):
|
||||
weights = [0.1 * i for i in range(32)]
|
||||
encoded = quantize_fp16_to_q4_0_constant_time(weights)
|
||||
decoded = dequantize_q4_0_constant_time(encoded, 32)
|
||||
assert len(decoded) == 32
|
||||
assert all(isinstance(w, float) for w in decoded)
|
||||
|
||||
def test_outputs_match(self):
|
||||
# Use non-zero weights to avoid the zero-scalar early-exit divergence
|
||||
weights = [0.5, -0.3, 0.8, 0.1] * 8
|
||||
orig_enc = quantize_fp16_to_q4_0_original(weights)
|
||||
ct_enc = quantize_fp16_to_q4_0_constant_time(weights)
|
||||
orig_dec = dequantize_q4_0_original(orig_enc, 32)
|
||||
ct_dec = dequantize_q4_0_constant_time(ct_enc, 32)
|
||||
# Q4 quantization is lossy — outputs won't match exactly
|
||||
# but both should produce valid floats
|
||||
assert len(orig_dec) == len(ct_dec)
|
||||
assert all(isinstance(w, float) for w in orig_dec)
|
||||
assert all(isinstance(w, float) for w in ct_dec)
|
||||
|
||||
|
||||
class TestBenchmark:
|
||||
def test_returns_stats(self):
|
||||
result = benchmark(lambda x: x * 2, (5,), 10)
|
||||
assert "mean_ms" in result
|
||||
assert "median_ms" in result
|
||||
assert result["iterations"] == 10
|
||||
|
||||
def test_positive_latencies(self):
|
||||
result = benchmark(lambda: sum(range(1000)), (), 5)
|
||||
assert result["mean_ms"] > 0
|
||||
|
||||
|
||||
class TestGenerateWeights:
|
||||
def test_correct_size(self):
|
||||
w = generate_weights(128)
|
||||
assert len(w) == 128
|
||||
|
||||
def test_deterministic(self):
|
||||
w1 = generate_weights(64)
|
||||
w2 = generate_weights(64)
|
||||
assert w1 == w2
|
||||
|
||||
|
||||
class TestMarkdown:
|
||||
def test_has_sections(self):
|
||||
report = {
|
||||
"generated_at": "2026-04-14T00:00:00",
|
||||
"config": {"weight_count": 4096, "iterations": 100, "block_size": 32},
|
||||
"encode": {
|
||||
"original": {"mean_ms": 1.0, "median_ms": 1.0, "p95_ms": 1.5, "p99_ms": 2.0},
|
||||
"constant_time": {"mean_ms": 1.1, "median_ms": 1.1, "p95_ms": 1.6, "p99_ms": 2.1},
|
||||
},
|
||||
"decode": {
|
||||
"original": {"mean_ms": 0.5, "median_ms": 0.5, "p95_ms": 0.7, "p99_ms": 0.9},
|
||||
"constant_time": {"mean_ms": 0.55, "median_ms": 0.55, "p95_ms": 0.75, "p99_ms": 0.95},
|
||||
},
|
||||
"correctness": {"max_decode_diff": 0.0, "outputs_match": True},
|
||||
"overhead": {"encode_pct": 10.0, "decode_pct": 10.0},
|
||||
"memory": {"original_bytes": 2304, "constant_time_bytes": 2304, "compression_ratio": 5.69},
|
||||
}
|
||||
md = to_markdown(report)
|
||||
assert "Encode Latency" in md
|
||||
assert "Decode Latency" in md
|
||||
assert "Correctness" in md
|
||||
Reference in New Issue
Block a user