systemd/llama-server.service

[Unit]
Description=llama.cpp Local LLM Server
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=root
Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf
Environment=LLAMA_HOST=0.0.0.0
Environment=LLAMA_PORT=11435
Environment=LLAMA_CTX_SIZE=4096
Environment=LLAMA_THREADS=4
ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching
Restart=on-failure
RestartSec=10
MemoryMax=12G
CPUQuota=90%
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/opt/models
PrivateTmp=true
StandardOutput=journal
SyslogIdentifier=llama-server

[Install]
WantedBy=multi-user.target
feat: standardize llama.cpp backend (#1123) 2026-04-14 01:42:37 +00:00			`[Unit]`
			`Description=llama.cpp Local LLM Server`
			`After=network-online.target`
			`Wants=network-online.target`

			`[Service]`
			`Type=simple`
			`User=root`
			`Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf`
			`Environment=LLAMA_HOST=0.0.0.0`
			`Environment=LLAMA_PORT=11435`
			`Environment=LLAMA_CTX_SIZE=4096`
			`Environment=LLAMA_THREADS=4`
			`ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching`
			`Restart=on-failure`
			`RestartSec=10`
			`MemoryMax=12G`
			`CPUQuota=90%`
			`NoNewPrivileges=true`
			`ProtectSystem=strict`
			`ProtectHome=read-only`
			`ReadWritePaths=/opt/models`
			`PrivateTmp=true`
			`StandardOutput=journal`
			`SyslogIdentifier=llama-server`

			`[Install]`
			`WantedBy=multi-user.target`