turboquant/ansible/roles/turboquant-deploy/tasks/main.yml

---
# Main tasks for TurboQuant deployment

- name: Gather OS facts
  setup:
    filter: ansible_distribution*

- name: Include OS-specific tasks
  include_tasks: "{{ ansible_os_family | lower }}.yml"
  when: ansible_os_family in ['Debian', 'RedHat', 'Darwin']

- name: Create model directory
  file:
    path: "{{ model_base_path }}/gemma4-turboquant"
    state: directory
    mode: '0755'

- name: Clone llama.cpp TurboQuant fork
  git:
    repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/llama-cpp-turboquant.git"
    dest: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
    version: "{{ turboquant_version }}"
  register: turboquant_clone

- name: Build llama.cpp with TurboQuant
  shell: |
    cmake -B build -DGGML_METAL={{ 'ON' if ansible_architecture == 'arm64' else 'OFF' }}           -DGGML_CUDA={{ 'ON' if gpu_type == 'nvidia' else 'OFF' }}           -DCMAKE_BUILD_TYPE=Release
    cmake --build build -j{{ ansible_processor_vcpus }}
  args:
    chdir: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
  when: turboquant_clone.changed or force_rebuild | default(false)

- name: Download Gemma 4 model
  get_url:
    url: "{{ gemma4_model_url }}"
    dest: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename }}"
    mode: '0644'
  when: gemma4_model_url is defined

- name: Deploy TurboQuant server config
  template:
    src: server_config.yml.j2
    dest: "{{ model_base_path }}/gemma4-turboquant/server_config.yml"
    mode: '0644'

- name: Deploy systemd service
  template:
    src: turboquant.service.j2
    dest: /etc/systemd/system/turboquant.service
    mode: '0644'
  notify: restart turboquant

- name: Enable and start TurboQuant service
  systemd:
    name: turboquant
    enabled: yes
    state: started
    daemon_reload: yes