Files
timmy-home/scripts/run_gemma4_llamacpp_server.sh

47 lines
1.2 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
MODEL_DIR="$HOME/models/gemma4-llamacpp"
MODEL_FILE="$MODEL_DIR/gemma-4-e4b-it-Q4_K_M.gguf"
REPO_ID="ggml-org/gemma-4-E4B-it-GGUF"
FILE_NAME="gemma-4-e4b-it-Q4_K_M.gguf"
SERVER_BIN="$HOME/turboquant/llama-cpp-fork/build/bin/llama-server"
PORT="8081"
HOST="127.0.0.1"
mkdir -p "$MODEL_DIR"
if ! command -v hf >/dev/null 2>&1; then
echo "hf CLI not found. Install huggingface_hub first."
exit 1
fi
if [ ! -x "$SERVER_BIN" ]; then
echo "llama-server not found at: $SERVER_BIN"
exit 1
fi
if [ ! -f "$MODEL_FILE" ]; then
echo "[Gemma4-llama.cpp] Downloading real Gemma 4 GGUF from $REPO_ID"
echo "[Gemma4-llama.cpp] Target: $MODEL_FILE"
hf download "$REPO_ID" "$FILE_NAME" --local-dir "$MODEL_DIR"
fi
echo
printf '[Gemma4-llama.cpp] Model: %s\n' "$MODEL_FILE"
printf '[Gemma4-llama.cpp] Server: %s\n' "$SERVER_BIN"
printf '[Gemma4-llama.cpp] Listen: http://%s:%s/v1\n' "$HOST" "$PORT"
echo '[Gemma4-llama.cpp] Starting llama-server with turbo4 KV cache...'
echo
export TURBO_LAYER_ADAPTIVE=7
exec "$SERVER_BIN" \
--jinja \
-m "$MODEL_FILE" \
--host "$HOST" \
--port "$PORT" \
-ngl 99 \
-c 8192 \
-ctk turbo4 \
-ctv turbo4