feat: llama.cpp integration branch for Metal shaders (#75)

2026-04-16 02:11:55 +00:00
parent 5428aae776
commit ef2b801b9e
1 changed files with 44 additions and 0 deletions
--- a/ggml-metal-turbo.h
+++ b/ggml-metal-turbo.h
@@ -0,0 +1,44 @@
+// ggml-metal-turbo.h — TurboQuant Metal kernel registration
+// Integrates ggml-metal-turbo.metal kernels into llama.cpp's Metal backend
+//
+// Usage: Call ggml_metal_turbo_register(device, ctx) after ggml_metal_init()
+//        to load and register TurboQuant kernels with the Metal backend.
+
+#ifndef GGML_METAL_TURBO_H
+#define GGML_METAL_TURBO_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Opaque forward declarations matching ggml-metal internals
+struct ggml_backend_metal_device;
+struct ggml_metal_context;
+
+// TurboQuant kernel indices (registered in ggml-metal kernel array)
+enum ggml_metal_turbo_kernel {
+    GGML_METAL_TURBO_KERNEL_FWHT_128 = 0,
+    GGML_METAL_TURBO_KERNEL_TURBO4_DEQUANT,
+    GGML_METAL_TURBO_KERNEL_ATTENTION_TURBO4,
+    GGML_METAL_TURBO_KERNEL_COUNT
+};
+
+// Register TurboQuant Metal kernels.
+// Returns true on success, false if Metal unavailable or compilation failed.
+// Must be called after ggml_metal_init() and before first inference.
+bool ggml_metal_turbo_register(struct ggml_backend_metal_device * device);
+
+// Check if TurboQuant kernels are loaded and ready.
+bool ggml_metal_turbo_available(void);
+
+// Get the Metal pipeline for a specific TurboQuant kernel.
+// Returns NULL if kernel not loaded.
+void * ggml_metal_turbo_get_pipeline(enum ggml_metal_turbo_kernel kernel);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // GGML_METAL_TURBO_H