From 0d427b69d3b014052aa9920defcf2877ea52c0f9 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 16 Apr 2026 01:51:06 +0000 Subject: [PATCH] feat: add ansible/roles/turboquant-deploy/templates/turboquant.service.j2 for TurboQuant deployment --- .../templates/turboquant.service.j2 | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 ansible/roles/turboquant-deploy/templates/turboquant.service.j2 diff --git a/ansible/roles/turboquant-deploy/templates/turboquant.service.j2 b/ansible/roles/turboquant-deploy/templates/turboquant.service.j2 new file mode 100644 index 00000000..e365224a --- /dev/null +++ b/ansible/roles/turboquant-deploy/templates/turboquant.service.j2 @@ -0,0 +1,19 @@ +[Unit] +Description=TurboQuant Gemma 4 Inference Server +After=network.target + +[Service] +Type=simple +User={{ turboquant_user | default('root') }} +WorkingDirectory={{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }} +Environment=TURBO_LAYER_ADAPTIVE={{ turboquant_layer_mode | default(7) }} +{% if ansible_architecture == 'arm64' %} +Environment=GGML_METAL_DEBUG=0 +Environment=OMP_NUM_THREADS={{ ansible_processor_vcpus }} +{% endif %} +ExecStart={{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}/build/bin/llama-server -m {{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }} --port {{ llama_cpp_port }} -ctk {{ turboquant_kv_type | default('turbo4') }} -ctv {{ turboquant_kv_type | default('turbo4') }} -c {{ max_context_tokens | default(131072) }} --host 0.0.0.0 +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target