[Unit]
Description=llama.cpp inference server for Timmy
After=network.target

[Service]
Type=simple
User=root
WorkingDirectory=/root/timmy
ExecStart=/root/timmy/llama-server \
    -m /root/timmy/models/hermes-3-8b.Q4_K_M.gguf \
    --host 127.0.0.1 \
    --port 8081 \
    -c 8192 \
    -np 1 \
    --jinja \
    -ngl 0
Restart=always
RestartSec=10
Environment="HOME=/root"

[Install]
WantedBy=multi-user.target