Files
turboquant/ansible/roles/turboquant-deploy/templates/server_config.yml.j2

25 lines
736 B
Django/Jinja

# TurboQuant Server Configuration
# Generated by Ansible for {{ inventory_hostname }}
server:
host: "0.0.0.0"
port: {{ llama_cpp_port }}
model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}"
turboquant:
enabled: true
kv_type: "{{ turboquant_kv_type | default('turbo4') }}"
layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }}
context:
max_tokens: {{ max_context_tokens | default(131072) }}
batch_size: {{ batch_size | default(512) }}
generation:
temperature: {{ temperature | default(0.7) }}
top_p: {{ top_p | default(0.9) }}
top_k: {{ top_k | default(40) }}
environment:
TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"