feat: standardize llama.cpp backend (#1123)

This commit is contained in:
2026-04-14 01:42:37 +00:00
committed by Alexander Whitestone
parent a0443a7003
commit 48f85da0c0

View File

@@ -0,0 +1,29 @@
[Unit]
Description=llama.cpp Local LLM Server
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=root
Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf
Environment=LLAMA_HOST=0.0.0.0
Environment=LLAMA_PORT=11435
Environment=LLAMA_CTX_SIZE=4096
Environment=LLAMA_THREADS=4
ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching
Restart=on-failure
RestartSec=10
MemoryMax=12G
CPUQuota=90%
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/opt/models
PrivateTmp=true
StandardOutput=journal
StandardError=journal
SyslogIdentifier=llama-server
[Install]
WantedBy=multi-user.target