diff --git a/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 b/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 new file mode 100644 index 00000000..e15098b7 --- /dev/null +++ b/ansible/roles/turboquant-deploy/templates/server_config.yml.j2 @@ -0,0 +1,24 @@ +# TurboQuant Server Configuration +# Generated by Ansible for {{ inventory_hostname }} + +server: + host: "0.0.0.0" + port: {{ llama_cpp_port }} + model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}" + +turboquant: + enabled: true + kv_type: "{{ turboquant_kv_type | default('turbo4') }}" + layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }} + +context: + max_tokens: {{ max_context_tokens | default(131072) }} + batch_size: {{ batch_size | default(512) }} + +generation: + temperature: {{ temperature | default(0.7) }} + top_p: {{ top_p | default(0.9) }} + top_k: {{ top_k | default(40) }} + +environment: + TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"