feat: llama.cpp integration branch for Metal shaders (#75)

This commit is contained in:
2026-04-16 02:11:55 +00:00
parent 5428aae776
commit ef2b801b9e

44
ggml-metal-turbo.h Normal file
View File

@@ -0,0 +1,44 @@
// ggml-metal-turbo.h — TurboQuant Metal kernel registration
// Integrates ggml-metal-turbo.metal kernels into llama.cpp's Metal backend
//
// Usage: Call ggml_metal_turbo_register(device, ctx) after ggml_metal_init()
// to load and register TurboQuant kernels with the Metal backend.
#ifndef GGML_METAL_TURBO_H
#define GGML_METAL_TURBO_H
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
// Opaque forward declarations matching ggml-metal internals
struct ggml_backend_metal_device;
struct ggml_metal_context;
// TurboQuant kernel indices (registered in ggml-metal kernel array)
enum ggml_metal_turbo_kernel {
GGML_METAL_TURBO_KERNEL_FWHT_128 = 0,
GGML_METAL_TURBO_KERNEL_TURBO4_DEQUANT,
GGML_METAL_TURBO_KERNEL_ATTENTION_TURBO4,
GGML_METAL_TURBO_KERNEL_COUNT
};
// Register TurboQuant Metal kernels.
// Returns true on success, false if Metal unavailable or compilation failed.
// Must be called after ggml_metal_init() and before first inference.
bool ggml_metal_turbo_register(struct ggml_backend_metal_device * device);
// Check if TurboQuant kernels are loaded and ready.
bool ggml_metal_turbo_available(void);
// Get the Metal pipeline for a specific TurboQuant kernel.
// Returns NULL if kernel not loaded.
void * ggml_metal_turbo_get_pipeline(enum ggml_metal_turbo_kernel kernel);
#ifdef __cplusplus
}
#endif
#endif // GGML_METAL_TURBO_H