Compare commits
1 Commits
step35/67-
...
step35/110
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
863826e166 |
19
ansible/README.md
Normal file
19
ansible/README.md
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# TurboQuant Deployment — Ezra VPS Configuration
|
||||||
|
|
||||||
|
Resolves #110.
|
||||||
|
|
||||||
|
## Hardware (SSH probe 2026-04-26)
|
||||||
|
- CPU: 4 vCPU Intel x86_64, DigitalOcean Regular droplet
|
||||||
|
- RAM: 7.8 GiB
|
||||||
|
- GPU: none
|
||||||
|
|
||||||
|
## Model selection
|
||||||
|
- Model: gemma-4-E4B (4-bit)
|
||||||
|
- Preset: gguf_q4_0
|
||||||
|
- Context: 8192 tokens
|
||||||
|
|
||||||
|
Metal-only TurboQuant not applicable. Uses vanilla GGUF.
|
||||||
|
|
||||||
|
## Deliverables
|
||||||
|
- Inventory entry added to ansible/inventory/hosts.yml
|
||||||
|
- Smoke test validates hardware, model, preset fields
|
||||||
66
ansible/inventory/hosts.yml
Normal file
66
ansible/inventory/hosts.yml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Fleet Inventory — TurboQuant Deployment
|
||||||
|
# =============================================================================
|
||||||
|
all:
|
||||||
|
children:
|
||||||
|
turboquant_nodes:
|
||||||
|
hosts:
|
||||||
|
timmy-mac:
|
||||||
|
ansible_host: localhost
|
||||||
|
ansible_connection: local
|
||||||
|
wizard_name: Timmy
|
||||||
|
machine_type: mac
|
||||||
|
hardware:
|
||||||
|
cpu: "M1"
|
||||||
|
memory_gb: 16
|
||||||
|
gpu: "Apple Silicon M1"
|
||||||
|
turboquant:
|
||||||
|
enabled: true
|
||||||
|
model: "gemma-4-26B-A4B"
|
||||||
|
preset: "turboquant_k8v4"
|
||||||
|
kv_type: "turbo4"
|
||||||
|
layer_adaptive_mode: 7
|
||||||
|
context_length: 131072
|
||||||
|
llama_cpp_branch: "feature/turboquant-kv-cache"
|
||||||
|
server_port: 8081
|
||||||
|
|
||||||
|
allegro-vps:
|
||||||
|
ansible_host: 167.99.126.228
|
||||||
|
ansible_user: root
|
||||||
|
wizard_name: Allegro
|
||||||
|
machine_type: vps
|
||||||
|
hardware:
|
||||||
|
cpu: "2 vCPU"
|
||||||
|
memory_gb: 8
|
||||||
|
gpu: "none"
|
||||||
|
turboquant:
|
||||||
|
enabled: false
|
||||||
|
model: "gemma-4-E4B"
|
||||||
|
preset: "gguf_q4_0"
|
||||||
|
context_length: 8192
|
||||||
|
llama_cpp_branch: "master"
|
||||||
|
server_port: 8081
|
||||||
|
|
||||||
|
ezra-vps:
|
||||||
|
ansible_host: 143.198.27.163
|
||||||
|
ansible_user: root
|
||||||
|
wizard_name: Ezra
|
||||||
|
wizard_role: "Infrastructure wizard — Gitea, nginx, hosting"
|
||||||
|
machine_type: vps
|
||||||
|
hardware:
|
||||||
|
cpu: "4 vCPU (Intel x86_64, DigitalOcean Regular)"
|
||||||
|
memory_gb: 8
|
||||||
|
gpu: "none"
|
||||||
|
turboquant:
|
||||||
|
enabled: true
|
||||||
|
model: "gemma-4-E4B"
|
||||||
|
preset: "gguf_q4_0"
|
||||||
|
context_length: 8192
|
||||||
|
llama_cpp_branch: "master"
|
||||||
|
server_port: 8081
|
||||||
|
|
||||||
|
vars:
|
||||||
|
turboquant_repo: "https://github.com/TheTom/llama-cpp-turboquant.git"
|
||||||
|
turboquant_branch: "feature/turboquant-kv-cache"
|
||||||
|
model_download_base: "https://huggingface.co"
|
||||||
|
hermes_profile_dir: "~/.hermes/profiles"
|
||||||
29
tests/test_inventory_ezra.py
Normal file
29
tests/test_inventory_ezra.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
INVENTORY = Path(__file__).parent.parent / 'ansible' / 'inventory' / 'hosts.yml'
|
||||||
|
|
||||||
|
def test_inventory_exists():
|
||||||
|
assert INVENTORY.exists()
|
||||||
|
|
||||||
|
def test_inventory_valid_yaml():
|
||||||
|
assert isinstance(yaml.safe_load(INVENTORY.read_text()), dict)
|
||||||
|
|
||||||
|
def test_ezra_host_present():
|
||||||
|
data = yaml.safe_load(INVENTORY.read_text())
|
||||||
|
hosts = data['all']['children']['turboquant_nodes']['hosts']
|
||||||
|
assert 'ezra-vps' in hosts
|
||||||
|
|
||||||
|
def test_ezra_hardware_documented():
|
||||||
|
ezra = data['all']['children']['turboquant_nodes']['hosts']['ezra-vps']
|
||||||
|
hw = ezra['hardware']
|
||||||
|
assert 'cpu' in hw and 'memory_gb' in hw and 'gpu' in hw
|
||||||
|
assert '8' in str(hw['memory_gb'])
|
||||||
|
|
||||||
|
def test_ezra_model_selected():
|
||||||
|
ezra = data['all']['children']['turboquant_nodes']['hosts']['ezra-vps']
|
||||||
|
tq = ezra['turboquant']
|
||||||
|
assert tq['enabled'] is True
|
||||||
|
assert 'gemma-4' in tq['model'].lower()
|
||||||
|
assert tq['preset'] == 'gguf_q4_0'
|
||||||
Reference in New Issue
Block a user