25 lines
736 B
Django/Jinja
25 lines
736 B
Django/Jinja
# TurboQuant Server Configuration
|
|
# Generated by Ansible for {{ inventory_hostname }}
|
|
|
|
server:
|
|
host: "0.0.0.0"
|
|
port: {{ llama_cpp_port }}
|
|
model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}"
|
|
|
|
turboquant:
|
|
enabled: true
|
|
kv_type: "{{ turboquant_kv_type | default('turbo4') }}"
|
|
layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }}
|
|
|
|
context:
|
|
max_tokens: {{ max_context_tokens | default(131072) }}
|
|
batch_size: {{ batch_size | default(512) }}
|
|
|
|
generation:
|
|
temperature: {{ temperature | default(0.7) }}
|
|
top_p: {{ top_p | default(0.9) }}
|
|
top_k: {{ top_k | default(40) }}
|
|
|
|
environment:
|
|
TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"
|