feat: add ansible/roles/turboquant-deploy/templates/turboquant.service.j2 for TurboQuant deployment
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=TurboQuant Gemma 4 Inference Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User={{ turboquant_user | default('root') }}
|
||||
WorkingDirectory={{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}
|
||||
Environment=TURBO_LAYER_ADAPTIVE={{ turboquant_layer_mode | default(7) }}
|
||||
{% if ansible_architecture == 'arm64' %}
|
||||
Environment=GGML_METAL_DEBUG=0
|
||||
Environment=OMP_NUM_THREADS={{ ansible_processor_vcpus }}
|
||||
{% endif %}
|
||||
ExecStart={{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}/build/bin/llama-server -m {{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }} --port {{ llama_cpp_port }} -ctk {{ turboquant_kv_type | default('turbo4') }} -ctv {{ turboquant_kv_type | default('turbo4') }} -c {{ max_context_tokens | default(131072) }} --host 0.0.0.0
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
Reference in New Issue
Block a user