Files
the-nexus/systemd/llama-server.service
Alexander Whitestone e847b0e473
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Staging Verification Gate / verify-staging (push) Has been cancelled
feat: standardize llama.cpp backend for sovereign local inference (#1123)
2026-04-14 01:52:51 +00:00

29 lines
740 B
Desktop File

[Unit]
Description=llama.cpp Local LLM Server
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=root
Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf
Environment=LLAMA_HOST=0.0.0.0
Environment=LLAMA_PORT=11435
Environment=LLAMA_CTX_SIZE=4096
Environment=LLAMA_THREADS=4
ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching
Restart=on-failure
RestartSec=10
MemoryMax=12G
CPUQuota=90%
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=read-only
ReadWritePaths=/opt/models
PrivateTmp=true
StandardOutput=journal
SyslogIdentifier=llama-server
[Install]
WantedBy=multi-user.target