From ef2b801b9e7fdfdb6e3fbf59ba3b68a3324c88f9 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 16 Apr 2026 02:11:55 +0000 Subject: [PATCH] feat: llama.cpp integration branch for Metal shaders (#75) --- ggml-metal-turbo.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 ggml-metal-turbo.h diff --git a/ggml-metal-turbo.h b/ggml-metal-turbo.h new file mode 100644 index 0000000..e92a11f --- /dev/null +++ b/ggml-metal-turbo.h @@ -0,0 +1,44 @@ +// ggml-metal-turbo.h — TurboQuant Metal kernel registration +// Integrates ggml-metal-turbo.metal kernels into llama.cpp's Metal backend +// +// Usage: Call ggml_metal_turbo_register(device, ctx) after ggml_metal_init() +// to load and register TurboQuant kernels with the Metal backend. + +#ifndef GGML_METAL_TURBO_H +#define GGML_METAL_TURBO_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque forward declarations matching ggml-metal internals +struct ggml_backend_metal_device; +struct ggml_metal_context; + +// TurboQuant kernel indices (registered in ggml-metal kernel array) +enum ggml_metal_turbo_kernel { + GGML_METAL_TURBO_KERNEL_FWHT_128 = 0, + GGML_METAL_TURBO_KERNEL_TURBO4_DEQUANT, + GGML_METAL_TURBO_KERNEL_ATTENTION_TURBO4, + GGML_METAL_TURBO_KERNEL_COUNT +}; + +// Register TurboQuant Metal kernels. +// Returns true on success, false if Metal unavailable or compilation failed. +// Must be called after ggml_metal_init() and before first inference. +bool ggml_metal_turbo_register(struct ggml_backend_metal_device * device); + +// Check if TurboQuant kernels are loaded and ready. +bool ggml_metal_turbo_available(void); + +// Get the Metal pipeline for a specific TurboQuant kernel. +// Returns NULL if kernel not loaded. +void * ggml_metal_turbo_get_pipeline(enum ggml_metal_turbo_kernel kernel); + +#ifdef __cplusplus +} +#endif + +#endif // GGML_METAL_TURBO_H