turboquant/ansible/roles/turboquant-deploy/templates/server_config.yml.j2

# TurboQuant Server Configuration
# Generated by Ansible for {{ inventory_hostname }}

server:
  host: "0.0.0.0"
  port: {{ llama_cpp_port }}
  model_path: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename | default('gemma-4-q4_k_m.gguf') }}"

turboquant:
  enabled: true
  kv_type: "{{ turboquant_kv_type | default('turbo4') }}"
  layer_adaptive_mode: {{ turboquant_layer_mode | default(7) }}

context:
  max_tokens: {{ max_context_tokens | default(131072) }}
  batch_size: {{ batch_size | default(512) }}

generation:
  temperature: {{ temperature | default(0.7) }}
  top_p: {{ top_p | default(0.9) }}
  top_k: {{ top_k | default(40) }}

environment:
  TURBO_LAYER_ADAPTIVE: "{{ turboquant_layer_mode | default(7) }}"