Files
timmy-home/scripts/run_gemma4_turboquant_chat.sh

40 lines
1.0 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
MODEL_DIR="$HOME/models/gemma4-e4b"
MODEL_FILE="$MODEL_DIR/gemma-4-e4b-it-Q4_K_M.gguf"
CLI="$HOME/turboquant/llama-cpp-fork/build/bin/llama-cli"
mkdir -p "$MODEL_DIR"
if ! command -v hf >/dev/null 2>&1; then
echo "hf CLI not found. Install huggingface_hub / hf first."
exit 1
fi
if [ ! -x "$CLI" ]; then
echo "TurboQuant llama-cli not found at: $CLI"
exit 1
fi
if [ ! -f "$MODEL_FILE" ]; then
echo "[Gemma4-TurboQuant] Gemma 4 GGUF not found locally."
echo "[Gemma4-TurboQuant] Downloading ggml-org/gemma-4-E4B-it-GGUF -> $MODEL_FILE"
hf download ggml-org/gemma-4-E4B-it-GGUF gemma-4-e4b-it-Q4_K_M.gguf --local-dir "$MODEL_DIR"
fi
echo
printf '[Gemma4-TurboQuant] Model: %s\n' "$MODEL_FILE"
printf '[Gemma4-TurboQuant] CLI: %s\n' "$CLI"
echo '[Gemma4-TurboQuant] Starting interactive chat with turbo4 KV cache...'
echo '[Gemma4-TurboQuant] Press Ctrl+C to exit.'
echo
exec "$CLI" \
-m "$MODEL_FILE" \
-ngl 99 \
-ctk turbo4 \
-ctv turbo4 \
-c 8192 \
-cnv