diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 00000000..9e7bb0c3 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,19 @@ +# TurboQuant Deployment — Ezra VPS Configuration + +Resolves #110. + +## Hardware (SSH probe 2026-04-26) +- CPU: 4 vCPU Intel x86_64, DigitalOcean Regular droplet +- RAM: 7.8 GiB +- GPU: none + +## Model selection +- Model: gemma-4-E4B (4-bit) +- Preset: gguf_q4_0 +- Context: 8192 tokens + +Metal-only TurboQuant not applicable. Uses vanilla GGUF. + +## Deliverables +- Inventory entry added to ansible/inventory/hosts.yml +- Smoke test validates hardware, model, preset fields diff --git a/ansible/inventory/hosts.yml b/ansible/inventory/hosts.yml new file mode 100644 index 00000000..f984cb50 --- /dev/null +++ b/ansible/inventory/hosts.yml @@ -0,0 +1,66 @@ +# ============================================================================= +# Fleet Inventory — TurboQuant Deployment +# ============================================================================= +all: + children: + turboquant_nodes: + hosts: + timmy-mac: + ansible_host: localhost + ansible_connection: local + wizard_name: Timmy + machine_type: mac + hardware: + cpu: "M1" + memory_gb: 16 + gpu: "Apple Silicon M1" + turboquant: + enabled: true + model: "gemma-4-26B-A4B" + preset: "turboquant_k8v4" + kv_type: "turbo4" + layer_adaptive_mode: 7 + context_length: 131072 + llama_cpp_branch: "feature/turboquant-kv-cache" + server_port: 8081 + + allegro-vps: + ansible_host: 167.99.126.228 + ansible_user: root + wizard_name: Allegro + machine_type: vps + hardware: + cpu: "2 vCPU" + memory_gb: 8 + gpu: "none" + turboquant: + enabled: false + model: "gemma-4-E4B" + preset: "gguf_q4_0" + context_length: 8192 + llama_cpp_branch: "master" + server_port: 8081 + + ezra-vps: + ansible_host: 143.198.27.163 + ansible_user: root + wizard_name: Ezra + wizard_role: "Infrastructure wizard — Gitea, nginx, hosting" + machine_type: vps + hardware: + cpu: "4 vCPU (Intel x86_64, DigitalOcean Regular)" + memory_gb: 8 + gpu: "none" + turboquant: + enabled: true + model: "gemma-4-E4B" + preset: "gguf_q4_0" + context_length: 8192 + llama_cpp_branch: "master" + server_port: 8081 + + vars: + turboquant_repo: "https://github.com/TheTom/llama-cpp-turboquant.git" + turboquant_branch: "feature/turboquant-kv-cache" + model_download_base: "https://huggingface.co" + hermes_profile_dir: "~/.hermes/profiles" diff --git a/tests/test_inventory_ezra.py b/tests/test_inventory_ezra.py new file mode 100644 index 00000000..7090af1a --- /dev/null +++ b/tests/test_inventory_ezra.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import yaml +from pathlib import Path + +INVENTORY = Path(__file__).parent.parent / 'ansible' / 'inventory' / 'hosts.yml' + +def test_inventory_exists(): + assert INVENTORY.exists() + +def test_inventory_valid_yaml(): + assert isinstance(yaml.safe_load(INVENTORY.read_text()), dict) + +def test_ezra_host_present(): + data = yaml.safe_load(INVENTORY.read_text()) + hosts = data['all']['children']['turboquant_nodes']['hosts'] + assert 'ezra-vps' in hosts + +def test_ezra_hardware_documented(): + ezra = data['all']['children']['turboquant_nodes']['hosts']['ezra-vps'] + hw = ezra['hardware'] + assert 'cpu' in hw and 'memory_gb' in hw and 'gpu' in hw + assert '8' in str(hw['memory_gb']) + +def test_ezra_model_selected(): + ezra = data['all']['children']['turboquant_nodes']['hosts']['ezra-vps'] + tq = ezra['turboquant'] + assert tq['enabled'] is True + assert 'gemma-4' in tq['model'].lower() + assert tq['preset'] == 'gguf_q4_0'