31 lines
608 B
C
31 lines
608 B
C
|
|
//
|
||
|
|
// llama.h — Stub header for reference integration build
|
||
|
|
//
|
||
|
|
#ifndef LLAMA_H
|
||
|
|
#define LLAMA_H
|
||
|
|
|
||
|
|
#include <cstddef>
|
||
|
|
#include <cstdint>
|
||
|
|
|
||
|
|
struct llama_context {};
|
||
|
|
|
||
|
|
struct ggml_tensor; // forward
|
||
|
|
|
||
|
|
typedef struct llama_kv_cache {
|
||
|
|
int n;
|
||
|
|
int d;
|
||
|
|
void * data;
|
||
|
|
int type; // using int instead of enum to avoid ABI issues
|
||
|
|
float * qjl_scales;
|
||
|
|
uint8_t * qjl_signs;
|
||
|
|
float * qjl_proj;
|
||
|
|
} llama_kv_cache;
|
||
|
|
|
||
|
|
// Minimal ggml_type values needed for integration
|
||
|
|
#define GGML_TYPE_F32 0
|
||
|
|
#define GGML_TYPE_F16 1
|
||
|
|
#define GGML_TYPE_Q4_0 2
|
||
|
|
#define GGML_TYPE_TURBOQUANT_QJL 0x103
|
||
|
|
|
||
|
|
#endif // LLAMA_H
|