59 lines
1.8 KiB
YAML
59 lines
1.8 KiB
YAML
---
|
|
# Main tasks for TurboQuant deployment
|
|
|
|
- name: Gather OS facts
|
|
setup:
|
|
filter: ansible_distribution*
|
|
|
|
- name: Include OS-specific tasks
|
|
include_tasks: "{{ ansible_os_family | lower }}.yml"
|
|
when: ansible_os_family in ['Debian', 'RedHat', 'Darwin']
|
|
|
|
- name: Create model directory
|
|
file:
|
|
path: "{{ model_base_path }}/gemma4-turboquant"
|
|
state: directory
|
|
mode: '0755'
|
|
|
|
- name: Clone llama.cpp TurboQuant fork
|
|
git:
|
|
repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/llama-cpp-turboquant.git"
|
|
dest: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
|
|
version: "{{ turboquant_version }}"
|
|
register: turboquant_clone
|
|
|
|
- name: Build llama.cpp with TurboQuant
|
|
shell: |
|
|
cmake -B build -DGGML_METAL={{ 'ON' if ansible_architecture == 'arm64' else 'OFF' }} -DGGML_CUDA={{ 'ON' if gpu_type == 'nvidia' else 'OFF' }} -DCMAKE_BUILD_TYPE=Release
|
|
cmake --build build -j{{ ansible_processor_vcpus }}
|
|
args:
|
|
chdir: "{{ turboquant_install_path | default('/opt/llama-cpp-turboquant') }}"
|
|
when: turboquant_clone.changed or force_rebuild | default(false)
|
|
|
|
- name: Download Gemma 4 model
|
|
get_url:
|
|
url: "{{ gemma4_model_url }}"
|
|
dest: "{{ model_base_path }}/gemma4-turboquant/{{ gemma4_model_filename }}"
|
|
mode: '0644'
|
|
when: gemma4_model_url is defined
|
|
|
|
- name: Deploy TurboQuant server config
|
|
template:
|
|
src: server_config.yml.j2
|
|
dest: "{{ model_base_path }}/gemma4-turboquant/server_config.yml"
|
|
mode: '0644'
|
|
|
|
- name: Deploy systemd service
|
|
template:
|
|
src: turboquant.service.j2
|
|
dest: /etc/systemd/system/turboquant.service
|
|
mode: '0644'
|
|
notify: restart turboquant
|
|
|
|
- name: Enable and start TurboQuant service
|
|
systemd:
|
|
name: turboquant
|
|
enabled: yes
|
|
state: started
|
|
daemon_reload: yes
|