Files
turboquant/include/llama.h

31 lines
608 B
C
Raw Normal View History

//
// llama.h — Stub header for reference integration build
//
#ifndef LLAMA_H
#define LLAMA_H
#include <cstddef>
#include <cstdint>
struct llama_context {};
struct ggml_tensor; // forward
typedef struct llama_kv_cache {
int n;
int d;
void * data;
int type; // using int instead of enum to avoid ABI issues
float * qjl_scales;
uint8_t * qjl_signs;
float * qjl_proj;
} llama_kv_cache;
// Minimal ggml_type values needed for integration
#define GGML_TYPE_F32 0
#define GGML_TYPE_F16 1
#define GGML_TYPE_Q4_0 2
#define GGML_TYPE_TURBOQUANT_QJL 0x103
#endif // LLAMA_H