feat: llama.cpp integration branch for Metal shaders (#75)
This commit is contained in:
44
ggml-metal-turbo.h
Normal file
44
ggml-metal-turbo.h
Normal file
@@ -0,0 +1,44 @@
|
||||
// ggml-metal-turbo.h — TurboQuant Metal kernel registration
|
||||
// Integrates ggml-metal-turbo.metal kernels into llama.cpp's Metal backend
|
||||
//
|
||||
// Usage: Call ggml_metal_turbo_register(device, ctx) after ggml_metal_init()
|
||||
// to load and register TurboQuant kernels with the Metal backend.
|
||||
|
||||
#ifndef GGML_METAL_TURBO_H
|
||||
#define GGML_METAL_TURBO_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque forward declarations matching ggml-metal internals
|
||||
struct ggml_backend_metal_device;
|
||||
struct ggml_metal_context;
|
||||
|
||||
// TurboQuant kernel indices (registered in ggml-metal kernel array)
|
||||
enum ggml_metal_turbo_kernel {
|
||||
GGML_METAL_TURBO_KERNEL_FWHT_128 = 0,
|
||||
GGML_METAL_TURBO_KERNEL_TURBO4_DEQUANT,
|
||||
GGML_METAL_TURBO_KERNEL_ATTENTION_TURBO4,
|
||||
GGML_METAL_TURBO_KERNEL_COUNT
|
||||
};
|
||||
|
||||
// Register TurboQuant Metal kernels.
|
||||
// Returns true on success, false if Metal unavailable or compilation failed.
|
||||
// Must be called after ggml_metal_init() and before first inference.
|
||||
bool ggml_metal_turbo_register(struct ggml_backend_metal_device * device);
|
||||
|
||||
// Check if TurboQuant kernels are loaded and ready.
|
||||
bool ggml_metal_turbo_available(void);
|
||||
|
||||
// Get the Metal pipeline for a specific TurboQuant kernel.
|
||||
// Returns NULL if kernel not loaded.
|
||||
void * ggml_metal_turbo_get_pipeline(enum ggml_metal_turbo_kernel kernel);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // GGML_METAL_TURBO_H
|
||||
Reference in New Issue
Block a user