Files
turboquant/ansible/roles/turboquant-deploy/tasks/main.yml

59 lines
1.8 KiB
YAML

---
# Main tasks for TurboQuant deployment
- name: Gather OS facts
setup:
filter: ansible_distribution*
- name: Include OS-specific tasks
include_tasks: "{{ ansible_os_family | lower }}.yml"
when: ansible_os_family in ['Debian', 'RedHat', 'Darwin']
- name: Create model directory
file:
path: "{{ model_base_path }}/gemma4-turboquant"
state: directory
mode: '0755'
- name: Clone llama.cpp TurboQuant fork
git:
repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/llama-cpp-turboquant.git"
dest: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
version: "{{ turboquant_version }}"
register: turboquant_clone
- name: Build llama.cpp with TurboQuant
shell: |
cmake -B build -DGGML_METAL={{ 'ON' if ansible_architecture == 'arm64' else 'OFF' }} -DGGML_CUDA={{ 'ON' if gpu_type == 'nvidia' else 'OFF' }} -DCMAKE_BUILD_TYPE=Release
cmake --build build -j{{ ansible_processor_vcpus }}
args:
chdir: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
when: turboquant_clone.changed or force_rebuild | default(false)
- name: Download Gemma 4 model
get_url:
url: "{{ gemma4_model_url }}"
dest: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename }}"
mode: '0644'
when: gemma4_model_url is defined
- name: Deploy TurboQuant server config
template:
src: server_config.yml.j2
dest: "{{ model_base_path }}/gemma4-turboquant/server_config.yml"
mode: '0644'
- name: Deploy systemd service
template:
src: turboquant.service.j2
dest: /etc/systemd/system/turboquant.service
mode: '0644'
notify: restart turboquant
- name: Enable and start TurboQuant service
systemd:
name: turboquant
enabled: yes
state: started
daemon_reload: yes