[Unit] Description=llama.cpp Local LLM Server After=network-online.target Wants=network-online.target [Service] Type=simple User=root Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf Environment=LLAMA_HOST=0.0.0.0 Environment=LLAMA_PORT=11435 Environment=LLAMA_CTX_SIZE=4096 Environment=LLAMA_THREADS=4 ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching Restart=on-failure RestartSec=10 MemoryMax=12G CPUQuota=90% NoNewPrivileges=true ProtectSystem=strict ProtectHome=read-only ReadWritePaths=/opt/models PrivateTmp=true StandardOutput=journal SyslogIdentifier=llama-server [Install] WantedBy=multi-user.target