29 lines
740 B
SYSTEMD
29 lines
740 B
SYSTEMD
|
|
[Unit]
|
||
|
|
Description=llama.cpp Local LLM Server
|
||
|
|
After=network-online.target
|
||
|
|
Wants=network-online.target
|
||
|
|
|
||
|
|
[Service]
|
||
|
|
Type=simple
|
||
|
|
User=root
|
||
|
|
Environment=MODEL_PATH=/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf
|
||
|
|
Environment=LLAMA_HOST=0.0.0.0
|
||
|
|
Environment=LLAMA_PORT=11435
|
||
|
|
Environment=LLAMA_CTX_SIZE=4096
|
||
|
|
Environment=LLAMA_THREADS=4
|
||
|
|
ExecStart=/usr/local/bin/llama-server -m ${MODEL_PATH} --host ${LLAMA_HOST} --port ${LLAMA_PORT} -c ${LLAMA_CTX_SIZE} -t ${LLAMA_THREADS} --cont-batching
|
||
|
|
Restart=on-failure
|
||
|
|
RestartSec=10
|
||
|
|
MemoryMax=12G
|
||
|
|
CPUQuota=90%
|
||
|
|
NoNewPrivileges=true
|
||
|
|
ProtectSystem=strict
|
||
|
|
ProtectHome=read-only
|
||
|
|
ReadWritePaths=/opt/models
|
||
|
|
PrivateTmp=true
|
||
|
|
StandardOutput=journal
|
||
|
|
SyslogIdentifier=llama-server
|
||
|
|
|
||
|
|
[Install]
|
||
|
|
WantedBy=multi-user.target
|