feat: add ansible/roles/turboquant-deploy/templates/server_config.yml.j2 for TurboQuant deployment
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
# TurboQuant Server Configuration
|
||||
# Generated by Ansible for {{ inventory_hostname }}
|
||||
|
||||
server:
|
||||
host: "0.0.0.0"
|
||||
port: {{ llama_cpp_port }}
|
||||
model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}"
|
||||
|
||||
turboquant:
|
||||
enabled: true
|
||||
kv_type: "{{ turboquant_kv_type | default('turbo4') }}"
|
||||
layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }}
|
||||
|
||||
context:
|
||||
max_tokens: {{ max_context_tokens | default(131072) }}
|
||||
batch_size: {{ batch_size | default(512) }}
|
||||
|
||||
generation:
|
||||
temperature: {{ temperature | default(0.7) }}
|
||||
top_p: {{ top_p | default(0.9) }}
|
||||
top_k: {{ top_k | default(40) }}
|
||||
|
||||
environment:
|
||||
TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"
|
||||
Reference in New Issue
Block a user