the-nexus/systemd/llama-server.service

[Unit]
Description=llama.cpp Local LLM Server
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=root
Group=root

# Model and server configuration
Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf
Environment=LLAMA_HOST=0.0.0.0
Environment=LLAMA_PORT=11435
Environment=LLAMA_CTX_SIZE=4096
Environment=LLAMA_THREADS=4

ExecStart=/usr/local/bin/llama-server \
    -m ${MODEL_PATH} \
    --host ${LLAMA_HOST} \
    --port ${LLAMA_PORT} \
    -c ${LLAMA_CTX_SIZE} \
    -t ${LLAMA_THREADS} \
    --cont-batching

Restart=on-failure
RestartSec=10
StartLimitBurst=3
StartLimitIntervalSec=60

# Resource limits
MemoryMax=12G
CPUQuota=90%

# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/opt/models
PrivateTmp=true
ProtectKernelTunables=true
ProtectControlGroups=true
RestrictSUIDSGID=true

# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=llama-server

[Install]
WantedBy=multi-user.target