diff --git a/ggml-metal-turbo.h b/ggml-metal-turbo.h new file mode 100644 index 0000000..e92a11f --- /dev/null +++ b/ggml-metal-turbo.h @@ -0,0 +1,44 @@ +// ggml-metal-turbo.h — TurboQuant Metal kernel registration +// Integrates ggml-metal-turbo.metal kernels into llama.cpp's Metal backend +// +// Usage: Call ggml_metal_turbo_register(device, ctx) after ggml_metal_init() +// to load and register TurboQuant kernels with the Metal backend. + +#ifndef GGML_METAL_TURBO_H +#define GGML_METAL_TURBO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque forward declarations matching ggml-metal internals +struct ggml_backend_metal_device; +struct ggml_metal_context; + +// TurboQuant kernel indices (registered in ggml-metal kernel array) +enum ggml_metal_turbo_kernel { + GGML_METAL_TURBO_KERNEL_FWHT_128 = 0, + GGML_METAL_TURBO_KERNEL_TURBO4_DEQUANT, + GGML_METAL_TURBO_KERNEL_ATTENTION_TURBO4, + GGML_METAL_TURBO_KERNEL_COUNT +}; + +// Register TurboQuant Metal kernels. +// Returns true on success, false if Metal unavailable or compilation failed. +// Must be called after ggml_metal_init() and before first inference. +bool ggml_metal_turbo_register(struct ggml_backend_metal_device * device); + +// Check if TurboQuant kernels are loaded and ready. +bool ggml_metal_turbo_available(void); + +// Get the Metal pipeline for a specific TurboQuant kernel. +// Returns NULL if kernel not loaded. +void * ggml_metal_turbo_get_pipeline(enum ggml_metal_turbo_kernel kernel); + +#ifdef __cplusplus +} +#endif + +#endif // GGML_METAL_TURBO_H