From f1a699a3fcfd9afd9754f33be5b91eba73762629 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 16 Apr 2026 01:51:02 +0000 Subject: [PATCH] feat: add ansible/roles/turboquant-deploy/templates/server_config.yml.j2 for TurboQuant deployment --- .../templates/server_config.yml.j2 | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 ansible/roles/turboquant-deploy/templates/server_config.yml.j2 diff --git a/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 b/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 new file mode 100644 index 00000000..e15098b7 --- /dev/null +++ b/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 @@ -0,0 +1,24 @@ +# TurboQuant Server Configuration +# Generated by Ansible for {{ inventory_hostname }} + +server: + host: "0.0.0.0" + port: {{ llama_cpp_port }} + model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}" + +turboquant: + enabled: true + kv_type: "{{ turboquant_kv_type | default('turbo4') }}" + layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }} + +context: + max_tokens: {{ max_context_tokens | default(131072) }} + batch_size: {{ batch_size | default(512) }} + +generation: + temperature: {{ temperature | default(0.7) }} + top_p: {{ top_p | default(0.9) }} + top_k: {{ top_k | default(40) }} + +environment: + TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"