47 lines
1.2 KiB
Bash
Executable File
47 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
MODEL_DIR="$HOME/models/gemma4-llamacpp"
|
|
MODEL_FILE="$MODEL_DIR/gemma-4-e4b-it-Q4_K_M.gguf"
|
|
REPO_ID="ggml-org/gemma-4-E4B-it-GGUF"
|
|
FILE_NAME="gemma-4-e4b-it-Q4_K_M.gguf"
|
|
SERVER_BIN="$HOME/turboquant/llama-cpp-fork/build/bin/llama-server"
|
|
PORT="8081"
|
|
HOST="127.0.0.1"
|
|
|
|
mkdir -p "$MODEL_DIR"
|
|
|
|
if ! command -v hf >/dev/null 2>&1; then
|
|
echo "hf CLI not found. Install huggingface_hub first."
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -x "$SERVER_BIN" ]; then
|
|
echo "llama-server not found at: $SERVER_BIN"
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -f "$MODEL_FILE" ]; then
|
|
echo "[Gemma4-llama.cpp] Downloading real Gemma 4 GGUF from $REPO_ID"
|
|
echo "[Gemma4-llama.cpp] Target: $MODEL_FILE"
|
|
hf download "$REPO_ID" "$FILE_NAME" --local-dir "$MODEL_DIR"
|
|
fi
|
|
|
|
echo
|
|
printf '[Gemma4-llama.cpp] Model: %s\n' "$MODEL_FILE"
|
|
printf '[Gemma4-llama.cpp] Server: %s\n' "$SERVER_BIN"
|
|
printf '[Gemma4-llama.cpp] Listen: http://%s:%s/v1\n' "$HOST" "$PORT"
|
|
echo '[Gemma4-llama.cpp] Starting llama-server with turbo4 KV cache...'
|
|
echo
|
|
|
|
export TURBO_LAYER_ADAPTIVE=7
|
|
exec "$SERVER_BIN" \
|
|
--jinja \
|
|
-m "$MODEL_FILE" \
|
|
--host "$HOST" \
|
|
--port "$PORT" \
|
|
-ngl 99 \
|
|
-c 8192 \
|
|
-ctk turbo4 \
|
|
-ctv turbo4
|