Files
turboquant/llama-turbo.h

28 lines
641 B
C++

#ifndef LLAMA_TURBO_H
#define LLAMA_TURBO_H
#include <cstdint>
#ifdef __cplusplus
extern "C" {
#endif
// PolarQuant Turbo4 (4-bit)
// d: dimension (must be power of 2, e.g., 128)
// src: input float array [d]
// dst: output packed 4-bit indices [d/2]
// norm: output L2 norm (radius)
void polar_quant_encode_turbo4(const float* src, uint8_t* dst, float* norm, int d);
// PolarQuant Turbo4 Decode
// src: input packed 4-bit indices [d/2]
// dst: output float array [d]
// norm: input L2 norm (radius)
void polar_quant_decode_turbo4(const uint8_t* src, float* dst, float norm, int d);
#ifdef __cplusplus
}
#endif
#endif // LLAMA_TURBO_H