turboquant/include/llama.h

//
// llama.h — Stub header for reference integration build
//
#ifndef LLAMA_H
#define LLAMA_H

#include <cstddef>
#include <cstdint>

struct llama_context {};

struct ggml_tensor;  // forward

typedef struct llama_kv_cache {
    int n;
    int d;
    void * data;
    int   type;  // using int instead of enum to avoid ABI issues
    float * qjl_scales;
    uint8_t * qjl_signs;
    float * qjl_proj;
} llama_kv_cache;

// Minimal ggml_type values needed for integration
#define GGML_TYPE_F32  0
#define GGML_TYPE_F16  1
#define GGML_TYPE_Q4_0 2
#define GGML_TYPE_TURBOQUANT_QJL  0x103

#endif // LLAMA_H