diff --git a/ansible/roles/turboquant-deploy/tasks/main.yml b/ansible/roles/turboquant-deploy/tasks/main.yml new file mode 100644 index 00000000..8ecd3b16 --- /dev/null +++ b/ansible/roles/turboquant-deploy/tasks/main.yml @@ -0,0 +1,58 @@ +--- +# Main tasks for TurboQuant deployment + +- name: Gather OS facts + setup: + filter: ansible_distribution* + +- name: Include OS-specific tasks + include_tasks: "{{ ansible_os_family | lower }}.yml" + when: ansible_os_family in ['Debian', 'RedHat', 'Darwin'] + +- name: Create model directory + file: + path: "{{ model_base_path }}/gemma4-turboquant" + state: directory + mode: '0755' + +- name: Clone llama.cpp TurboQuant fork + git: + repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/llama-cpp-turboquant.git" + dest: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}" + version: "{{ turboquant_version }}" + register: turboquant_clone + +- name: Build llama.cpp with TurboQuant + shell: | + cmake -B build -DGGML_METAL={{ 'ON' if ansible_architecture == 'arm64' else 'OFF' }} -DGGML_CUDA={{ 'ON' if gpu_type == 'nvidia' else 'OFF' }} -DCMAKE_BUILD_TYPE=Release + cmake --build build -j{{ ansible_processor_vcpus }} + args: + chdir: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}" + when: turboquant_clone.changed or force_rebuild | default(false) + +- name: Download Gemma 4 model + get_url: + url: "{{ gemma4_model_url }}" + dest: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename }}" + mode: '0644' + when: gemma4_model_url is defined + +- name: Deploy TurboQuant server config + template: + src: server_config.yml.j2 + dest: "{{ model_base_path }}/gemma4-turboquant/server_config.yml" + mode: '0644' + +- name: Deploy systemd service + template: + src: turboquant.service.j2 + dest: /etc/systemd/system/turboquant.service + mode: '0644' + notify: restart turboquant + +- name: Enable and start TurboQuant service + systemd: + name: turboquant + enabled: yes + state: started + daemon_reload: yes