#!/usr/bin/env bash set -euo pipefail MODEL_DIR="$HOME/models/gemma4-e4b" MODEL_FILE="$MODEL_DIR/gemma-4-e4b-it-Q4_K_M.gguf" CLI="$HOME/turboquant/llama-cpp-fork/build/bin/llama-cli" mkdir -p "$MODEL_DIR" if ! command -v hf >/dev/null 2>&1; then echo "hf CLI not found. Install huggingface_hub / hf first." exit 1 fi if [ ! -x "$CLI" ]; then echo "TurboQuant llama-cli not found at: $CLI" exit 1 fi if [ ! -f "$MODEL_FILE" ]; then echo "[Gemma4-TurboQuant] Gemma 4 GGUF not found locally." echo "[Gemma4-TurboQuant] Downloading ggml-org/gemma-4-E4B-it-GGUF -> $MODEL_FILE" hf download ggml-org/gemma-4-E4B-it-GGUF gemma-4-e4b-it-Q4_K_M.gguf --local-dir "$MODEL_DIR" fi echo printf '[Gemma4-TurboQuant] Model: %s\n' "$MODEL_FILE" printf '[Gemma4-TurboQuant] CLI: %s\n' "$CLI" echo '[Gemma4-TurboQuant] Starting interactive chat with turbo4 KV cache...' echo '[Gemma4-TurboQuant] Press Ctrl+C to exit.' echo exec "$CLI" \ -m "$MODEL_FILE" \ -ngl 99 \ -ctk turbo4 \ -ctv turbo4 \ -c 8192 \ -cnv