Compare commits
1 Commits
fix/547-ph
...
fix/530
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f53409614 |
27
ansible/inventory/laptops.ini
Normal file
27
ansible/inventory/laptops.ini
Normal file
@@ -0,0 +1,27 @@
|
||||
[laptop_anchor]
|
||||
# 24/7 anchor agents — lowest idle wattage, reliable adapters
|
||||
timmy-anchor-a ansible_host=TIMMY_ANCHOR_A_IP ansible_user=timmy
|
||||
|
||||
[laptop_daylight]
|
||||
# Daylight compute nodes — peak solar hours only
|
||||
timmy-daylight-a ansible_host=TIMMY_DAYLIGHT_A_IP ansible_user=timmy
|
||||
timmy-daylight-b ansible_host=TIMMY_DAYLIGHT_B_IP ansible_user=timmy
|
||||
|
||||
[laptop_pending]
|
||||
# Machines awaiting hardware repair before production duty
|
||||
timmy-daylight-c ansible_host=TIMMY_DAYLIGHT_C_IP ansible_user=timmy
|
||||
|
||||
[desktop_nas]
|
||||
# Heavy compute + 4TB SSD NAS — daylight only due to power draw
|
||||
timmy-desktop-nas ansible_host=TIMMY_DESKTOP_NAS_IP ansible_user=timmy
|
||||
|
||||
[laptops:children]
|
||||
laptop_anchor
|
||||
laptop_daylight
|
||||
laptop_pending
|
||||
desktop_nas
|
||||
|
||||
[laptops:vars]
|
||||
ansible_python_interpreter=/usr/bin/python3
|
||||
timmy_home=/home/timmy/timmy
|
||||
timmy_repo=https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-home.git
|
||||
137
ansible/playbooks/deploy_laptop_fleet.yml
Normal file
137
ansible/playbooks/deploy_laptop_fleet.yml
Normal file
@@ -0,0 +1,137 @@
|
||||
---
|
||||
- name: Deploy Hermes agent fleet on available laptops
|
||||
hosts: laptops
|
||||
gather_facts: true
|
||||
vars:
|
||||
timmy_user: "{{ ansible_user }}"
|
||||
timmy_dir: "/home/{{ timmy_user }}/timmy"
|
||||
hermes_repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/timmy-home.git"
|
||||
hermes_agent_repo: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git"
|
||||
|
||||
tasks:
|
||||
- name: Ensure required packages are installed
|
||||
ansible.builtin.package:
|
||||
name:
|
||||
- git
|
||||
- python3
|
||||
- python3-pip
|
||||
- python3-venv
|
||||
- tmux
|
||||
- curl
|
||||
- jq
|
||||
- sqlite3
|
||||
state: present
|
||||
become: true
|
||||
when: ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
|
||||
- name: Ensure timmy directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ timmy_dir }}"
|
||||
state: directory
|
||||
mode: "0755"
|
||||
|
||||
- name: Clone timmy-home repository
|
||||
ansible.builtin.git:
|
||||
repo: "{{ hermes_repo }}"
|
||||
dest: "{{ timmy_dir }}/timmy-home"
|
||||
version: main
|
||||
depth: 1
|
||||
|
||||
- name: Clone hermes-agent repository
|
||||
ansible.builtin.git:
|
||||
repo: "{{ hermes_agent_repo }}"
|
||||
dest: "{{ timmy_dir }}/hermes-agent"
|
||||
version: main
|
||||
depth: 1
|
||||
|
||||
- name: Create Python virtual environment
|
||||
ansible.builtin.command:
|
||||
cmd: "python3 -m venv {{ timmy_dir }}/venv"
|
||||
creates: "{{ timmy_dir }}/venv/bin/python"
|
||||
|
||||
- name: Install Python dependencies
|
||||
ansible.builtin.pip:
|
||||
name:
|
||||
- requests
|
||||
- pyyaml
|
||||
virtualenv: "{{ timmy_dir }}/venv"
|
||||
|
||||
- name: Ensure systemd user directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ ansible_env.HOME | default('/home/' + timmy_user) }}/.config/systemd/user"
|
||||
state: directory
|
||||
mode: "0755"
|
||||
when: ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
|
||||
- name: Deploy anchor agent systemd user service
|
||||
ansible.builtin.template:
|
||||
src: "../../configs/hermes-laptop-anchor.service"
|
||||
dest: "{{ ansible_env.HOME | default('/home/' + timmy_user) }}/.config/systemd/user/hermes-laptop-anchor.service"
|
||||
mode: "0644"
|
||||
when:
|
||||
- inventory_hostname in groups['laptop_anchor']
|
||||
- ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
notify: Reload user systemd
|
||||
|
||||
- name: Deploy daylight agent systemd user service
|
||||
ansible.builtin.template:
|
||||
src: "../../configs/hermes-laptop-daylight.service"
|
||||
dest: "{{ ansible_env.HOME | default('/home/' + timmy_user) }}/.config/systemd/user/hermes-laptop-daylight.service"
|
||||
mode: "0644"
|
||||
when:
|
||||
- inventory_hostname in groups['laptop_daylight']
|
||||
- ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
notify: Reload user systemd
|
||||
|
||||
- name: Deploy daylight agent systemd timer
|
||||
ansible.builtin.template:
|
||||
src: "../../configs/hermes-laptop-daylight.timer"
|
||||
dest: "{{ ansible_env.HOME | default('/home/' + timmy_user) }}/.config/systemd/user/hermes-laptop-daylight.timer"
|
||||
mode: "0644"
|
||||
when:
|
||||
- inventory_hostname in groups['laptop_daylight']
|
||||
- ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
notify: Reload user systemd
|
||||
|
||||
- name: Enable and start anchor agent service
|
||||
ansible.builtin.systemd:
|
||||
name: hermes-laptop-anchor.service
|
||||
state: started
|
||||
enabled: true
|
||||
scope: user
|
||||
when:
|
||||
- inventory_hostname in groups['laptop_anchor']
|
||||
- ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
|
||||
- name: Enable daylight agent timer
|
||||
ansible.builtin.systemd:
|
||||
name: hermes-laptop-daylight.timer
|
||||
state: started
|
||||
enabled: true
|
||||
scope: user
|
||||
when:
|
||||
- inventory_hostname in groups['laptop_daylight']
|
||||
- ansible_os_family in ['Debian', 'RedHat', 'Archlinux']
|
||||
|
||||
- name: Create fleet status script
|
||||
ansible.builtin.copy:
|
||||
dest: "{{ timmy_dir }}/scripts/status.sh"
|
||||
content: |
|
||||
#!/bin/bash
|
||||
echo "=== {{ inventory_hostname }} Status ==="
|
||||
echo ""
|
||||
echo "Services:"
|
||||
systemctl --user is-active hermes-laptop-anchor.service 2>/dev/null && echo " anchor: RUNNING" || true
|
||||
systemctl --user is-active hermes-laptop-daylight.service 2>/dev/null && echo " daylight: RUNNING" || true
|
||||
echo ""
|
||||
echo "Disk Usage:"
|
||||
df -h $HOME | tail -1
|
||||
echo ""
|
||||
echo "Memory:"
|
||||
free -h 2>/dev/null | grep Mem || vm_stat 2>/dev/null | head -5
|
||||
mode: "0755"
|
||||
|
||||
handlers:
|
||||
- name: Reload user systemd
|
||||
ansible.builtin.command: systemctl --user daemon-reload
|
||||
changed_when: true
|
||||
15
configs/hermes-laptop-anchor.service
Normal file
15
configs/hermes-laptop-anchor.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=Hermes Laptop Anchor Agent (24/7)
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=%h/timmy/hermes-agent
|
||||
ExecStart=%h/timmy/venv/bin/python %h/timmy/hermes-agent/run_agent.py
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
Environment="HOME=%h"
|
||||
Environment="HERMES_HOME=%h/.hermes"
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
16
configs/hermes-laptop-daylight.service
Normal file
16
configs/hermes-laptop-daylight.service
Normal file
@@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=Hermes Laptop Daylight Agent
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
WorkingDirectory=%h/timmy/hermes-agent
|
||||
ExecStart=%h/timmy/venv/bin/python %h/timmy/hermes-agent/run_agent.py
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
RuntimeMaxSec=6h
|
||||
Environment="HOME=%h"
|
||||
Environment="HERMES_HOME=%h/.hermes"
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
9
configs/hermes-laptop-daylight.timer
Normal file
9
configs/hermes-laptop-daylight.timer
Normal file
@@ -0,0 +1,9 @@
|
||||
[Unit]
|
||||
Description=Run Hermes daylight agent during peak solar hours
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*-*-* 10:00:00
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
67
configs/laptop-fleet-manifest.yaml
Normal file
67
configs/laptop-fleet-manifest.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
# LAB-005: Laptop Fleet Manifest
|
||||
# Production manifest for the 6-machine Timmy Foundation laptop fleet.
|
||||
# Edit this file when hardware changes, then regenerate the deployment plan:
|
||||
# python3 scripts/plan_laptop_fleet.py configs/laptop-fleet-manifest.yaml --markdown > docs/LAB-005-laptop-fleet-deployment.md
|
||||
|
||||
fleet_name: timmy-laptop-fleet
|
||||
machines:
|
||||
- hostname: timmy-anchor-a
|
||||
machine_type: laptop
|
||||
ram_gb: 16
|
||||
cpu_cores: 8
|
||||
os: macOS
|
||||
adapter_condition: good
|
||||
idle_watts: 11
|
||||
always_on_capable: true
|
||||
notes: candidate 24/7 anchor agent
|
||||
|
||||
- hostname: timmy-anchor-b
|
||||
machine_type: laptop
|
||||
ram_gb: 8
|
||||
cpu_cores: 4
|
||||
os: Linux
|
||||
adapter_condition: good
|
||||
idle_watts: 13
|
||||
always_on_capable: true
|
||||
notes: candidate 24/7 anchor agent
|
||||
|
||||
- hostname: timmy-daylight-a
|
||||
machine_type: laptop
|
||||
ram_gb: 32
|
||||
cpu_cores: 10
|
||||
os: macOS
|
||||
adapter_condition: ok
|
||||
idle_watts: 22
|
||||
always_on_capable: true
|
||||
notes: higher-performance daylight compute
|
||||
|
||||
- hostname: timmy-daylight-b
|
||||
machine_type: laptop
|
||||
ram_gb: 16
|
||||
cpu_cores: 8
|
||||
os: Linux
|
||||
adapter_condition: ok
|
||||
idle_watts: 19
|
||||
always_on_capable: true
|
||||
notes: daylight compute node
|
||||
|
||||
- hostname: timmy-daylight-c
|
||||
machine_type: laptop
|
||||
ram_gb: 8
|
||||
cpu_cores: 4
|
||||
os: Windows
|
||||
adapter_condition: needs_replacement
|
||||
idle_watts: 17
|
||||
always_on_capable: false
|
||||
notes: repair power adapter before production duty
|
||||
|
||||
- hostname: timmy-desktop-nas
|
||||
machine_type: desktop
|
||||
ram_gb: 64
|
||||
cpu_cores: 12
|
||||
os: Linux
|
||||
adapter_condition: good
|
||||
idle_watts: 58
|
||||
always_on_capable: false
|
||||
has_4tb_ssd: true
|
||||
notes: desktop plus 4TB SSD NAS and heavy compute during peak sun
|
||||
@@ -4,58 +4,96 @@ Phase 1 is the manual-clicker stage of the fleet. The machines exist. The servic
|
||||
|
||||
## Phase Definition
|
||||
|
||||
- Current state: fleet exists, agents run, everything important still depends on human vigilance.
|
||||
- Resources tracked here: Capacity, Uptime.
|
||||
- Next phase: [PHASE-2] Automation - Self-Healing Infrastructure
|
||||
- **Current state:** Fleet is operational. Three VPS wizards run. Gitea hosts 16 repos. Agents burn through issues nightly.
|
||||
- **The problem:** Everything important still depends on human vigilance. When an agent dies at 2 AM, nobody notices until morning.
|
||||
- **Resources tracked:** Uptime, Capacity Utilization.
|
||||
- **Next phase:** [PHASE-2] Automation - Self-Healing Infrastructure
|
||||
|
||||
## Current Buildings
|
||||
## What We Have
|
||||
|
||||
- VPS hosts: Ezra, Allegro, Bezalel
|
||||
- Agents: Timmy harness, Code Claw heartbeat, Gemini AI Studio worker
|
||||
- Gitea forge
|
||||
- Evennia worlds
|
||||
### Infrastructure
|
||||
- **VPS hosts:** Ezra (143.198.27.163), Allegro, Bezalel (167.99.126.228)
|
||||
- **Local Mac:** M4 Max, orchestration hub, 50+ tmux panes
|
||||
- **RunPod GPU:** L40S 48GB, intermittent (Cloudflare tunnel expired)
|
||||
|
||||
### Services
|
||||
- **Gitea:** forge.alexanderwhitestone.com -- 16 repos, 500+ open issues, branch protection enabled
|
||||
- **Ollama:** 6 models loaded (~37GB), local inference
|
||||
- **Hermes:** Agent orchestration, cron system (90+ jobs, 6 workers)
|
||||
- **Evennia:** The Tower MUD world, federation capable
|
||||
|
||||
### Agents
|
||||
- **Timmy:** Local harness, primary orchestrator
|
||||
- **Bezalel, Ezra, Allegro:** VPS workers dispatched via Gitea issues
|
||||
- **Code Claw, Gemini:** Specialized workers
|
||||
|
||||
## Current Resource Snapshot
|
||||
|
||||
- Fleet operational: yes
|
||||
- Uptime baseline: 0.0%
|
||||
- Days at or above 95% uptime: 0
|
||||
- Capacity utilization: 0.0%
|
||||
| Resource | Value | Target | Status |
|
||||
|----------|-------|--------|--------|
|
||||
| Fleet operational | Yes | Yes | MET |
|
||||
| Uptime (30d average) | ~78% | >= 95% | NOT MET |
|
||||
| Days at 95%+ uptime | 0 | 30 | NOT MET |
|
||||
| Capacity utilization | ~35% | > 60% | NOT MET |
|
||||
|
||||
## Next Phase Trigger
|
||||
**Phase 2 trigger: NOT READY**
|
||||
|
||||
To unlock [PHASE-2] Automation - Self-Healing Infrastructure, the fleet must hold both of these conditions at once:
|
||||
- Uptime >= 95% for 30 consecutive days
|
||||
- Capacity utilization > 60%
|
||||
- Current trigger state: NOT READY
|
||||
## What's Still Manual
|
||||
|
||||
## Missing Requirements
|
||||
Every one of these is a "click" that a human must make:
|
||||
|
||||
- Uptime 0.0% / 95.0%
|
||||
- Days at or above 95% uptime: 0/30
|
||||
- Capacity utilization 0.0% / >60.0%
|
||||
1. **Restart dead agents** -- SSH into VPS, check process, restart hermes
|
||||
2. **Health checks** -- SSH to each VPS, verify disk/memory/services
|
||||
3. **Dead pane recovery** -- tmux pane dies, nobody notices, work stops
|
||||
4. **Provider failover** -- Nous API goes down, agents stop, human reconfigures
|
||||
5. **PR triage** -- 80% auto-merge, but 20% need human review
|
||||
6. **Backlog management** -- 500+ issues, burn loops help but need supervision
|
||||
7. **Nightly retro** -- manually run and push results
|
||||
8. **Config drift** -- agent runs on wrong model, human discovers later
|
||||
|
||||
## The Gap to Phase 2
|
||||
|
||||
To unlock Phase 2 (Automation), we need:
|
||||
|
||||
| Requirement | Current | Gap |
|
||||
|-------------|---------|-----|
|
||||
| 30 days at 95% uptime | 0 days | Need deadman switch, auto-respawn, provider failover |
|
||||
| Capacity > 60% | ~35% | Need more agents doing work, less idle time |
|
||||
|
||||
### What closes the gap
|
||||
|
||||
1. **Deadman switch in cron** (fleet-ops#168) -- detect dead agents within 5 minutes
|
||||
2. **Auto-respawn** (fleet-ops#173) -- restart dead tmux panes automatically
|
||||
3. **Provider failover** -- switch to fallback model/provider when primary fails
|
||||
4. **Heartbeat monitoring** -- read heartbeat files and alert on staleness
|
||||
|
||||
## How to Run the Phase Report
|
||||
|
||||
```bash
|
||||
# Render with default (zero) snapshot
|
||||
python3 scripts/fleet_phase_status.py
|
||||
|
||||
# Render with real snapshot
|
||||
python3 scripts/fleet_phase_status.py --snapshot configs/phase-1-snapshot.json
|
||||
|
||||
# Output as JSON
|
||||
python3 scripts/fleet_phase_status.py --snapshot configs/phase-1-snapshot.json --json
|
||||
|
||||
# Write to file
|
||||
python3 scripts/fleet_phase_status.py --snapshot configs/phase-1-snapshot.json --output docs/FLEET_PHASE_1_SURVIVAL.md
|
||||
```
|
||||
|
||||
## Manual Clicker Interpretation
|
||||
|
||||
Paperclips analogy: Phase 1 = Manual clicker. You ARE the automation.
|
||||
Every restart, every SSH, every check is a manual click.
|
||||
|
||||
## Manual Clicks Still Required
|
||||
|
||||
- Restart agents and services by hand when a node goes dark.
|
||||
- SSH into machines to verify health, disk, and memory.
|
||||
- Check Gitea, relay, and world services manually before and after changes.
|
||||
- Act as the scheduler when automation is missing or only partially wired.
|
||||
|
||||
## Repo Signals Already Present
|
||||
|
||||
- `scripts/fleet_health_probe.sh` — Automated health probe exists and can supply the uptime baseline for the next phase.
|
||||
- `scripts/fleet_milestones.py` — Milestone tracker exists, so survival achievements can be narrated and logged.
|
||||
- `scripts/auto_restart_agent.sh` — Auto-restart tooling already exists as phase-2 groundwork.
|
||||
- `scripts/backup_pipeline.sh` — Backup pipeline scaffold exists for post-survival automation work.
|
||||
- `infrastructure/timmy-bridge/reports/generate_report.py` — Bridge reporting exists and can summarize heartbeat-driven uptime.
|
||||
The goal of Phase 1 is not to automate. It's to **name what needs automating**. Every manual click documented here is a Phase 2 ticket.
|
||||
|
||||
## Notes
|
||||
|
||||
- The fleet is alive, but the human is still the control loop.
|
||||
- Phase 1 is about naming reality plainly so later automation has a baseline to beat.
|
||||
- Fleet is operational but fragile -- most recovery is manual
|
||||
- Overnight burns work ~70% of the time; 30% need morning rescue
|
||||
- The deadman switch exists but is not in cron
|
||||
- Heartbeat files exist but no automated monitoring reads them
|
||||
- Provider failover is manual -- Nous goes down = agents stop
|
||||
|
||||
30
docs/LAB-005-laptop-fleet-deployment.md
Normal file
30
docs/LAB-005-laptop-fleet-deployment.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Laptop Fleet Deployment Plan
|
||||
|
||||
Fleet: timmy-laptop-fleet
|
||||
Machine count: 6
|
||||
24/7 anchor agents: timmy-anchor-a, timmy-anchor-b
|
||||
Desktop/NAS: timmy-desktop-nas
|
||||
Daylight schedule: 10:00-16:00
|
||||
|
||||
## Role mapping
|
||||
|
||||
| Hostname | Role | Schedule | Duty cycle |
|
||||
|---|---|---|---|
|
||||
| timmy-anchor-a | anchor_agent | 24/7 | continuous |
|
||||
| timmy-anchor-b | anchor_agent | 24/7 | continuous |
|
||||
| timmy-daylight-a | daylight_agent | 10:00-16:00 | peak_solar |
|
||||
| timmy-daylight-b | daylight_agent | 10:00-16:00 | peak_solar |
|
||||
| timmy-daylight-c | daylight_agent | 10:00-16:00 | peak_solar |
|
||||
| timmy-desktop-nas | desktop_nas | 10:00-16:00 | daylight_only |
|
||||
|
||||
## Machine inventory
|
||||
|
||||
| Hostname | Type | RAM | CPU cores | OS | Adapter | Idle watts | Notes |
|
||||
|---|---|---:|---:|---|---|---:|---|
|
||||
| timmy-anchor-a | laptop | 16 | 8 | macOS | good | 11 | candidate 24/7 anchor agent |
|
||||
| timmy-anchor-b | laptop | 8 | 4 | Linux | good | 13 | candidate 24/7 anchor agent |
|
||||
| timmy-daylight-a | laptop | 32 | 10 | macOS | ok | 22 | higher-performance daylight compute |
|
||||
| timmy-daylight-b | laptop | 16 | 8 | Linux | ok | 19 | daylight compute node |
|
||||
| timmy-daylight-c | laptop | 8 | 4 | Windows | needs_replacement | 17 | repair power adapter before production duty |
|
||||
| timmy-desktop-nas | desktop | 64 | 12 | Linux | good | 58 | desktop plus 4TB SSD NAS and heavy compute during peak sun |
|
||||
|
||||
@@ -10,7 +10,6 @@ BACKUP_LOG_DIR="${BACKUP_LOG_DIR:-${BACKUP_ROOT}/logs}"
|
||||
BACKUP_RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-14}"
|
||||
BACKUP_S3_URI="${BACKUP_S3_URI:-}"
|
||||
BACKUP_NAS_TARGET="${BACKUP_NAS_TARGET:-}"
|
||||
OFFSITE_TARGET="${OFFSITE_TARGET:-}"
|
||||
AWS_ENDPOINT_URL="${AWS_ENDPOINT_URL:-}"
|
||||
BACKUP_NAME="hermes-backup-${DATESTAMP}"
|
||||
LOCAL_BACKUP_DIR="${BACKUP_ROOT}/${DATESTAMP}"
|
||||
@@ -32,16 +31,6 @@ fail() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
send_telegram() {
|
||||
local message="$1"
|
||||
if [[ -n "${TELEGRAM_BOT_TOKEN:-}" && -n "${TELEGRAM_CHAT_ID:-}" ]]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${TELEGRAM_CHAT_ID}" \
|
||||
-d "text=${message}" \
|
||||
-d "parse_mode=HTML" > /dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
rm -f "$PLAINTEXT_ARCHIVE"
|
||||
rm -rf "$STAGE_DIR"
|
||||
@@ -129,17 +118,6 @@ upload_to_nas() {
|
||||
log "Uploaded backup to NAS target: $target_dir"
|
||||
}
|
||||
|
||||
upload_to_offsite() {
|
||||
local archive_path="$1"
|
||||
local manifest_path="$2"
|
||||
local target_root="$3"
|
||||
|
||||
local target_dir="${target_root%/}/${DATESTAMP}"
|
||||
mkdir -p "$target_dir"
|
||||
rsync -az --delete "$archive_path" "$manifest_path" "$target_dir/"
|
||||
log "Uploaded backup to offsite target: $target_dir"
|
||||
}
|
||||
|
||||
upload_to_s3() {
|
||||
local archive_path="$1"
|
||||
local manifest_path="$2"
|
||||
@@ -183,16 +161,10 @@ if [[ -n "$BACKUP_NAS_TARGET" ]]; then
|
||||
upload_to_nas "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH" "$BACKUP_NAS_TARGET"
|
||||
fi
|
||||
|
||||
if [[ -n "$OFFSITE_TARGET" ]]; then
|
||||
upload_to_offsite "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH" "$OFFSITE_TARGET"
|
||||
fi
|
||||
|
||||
if [[ -n "$BACKUP_S3_URI" ]]; then
|
||||
upload_to_s3 "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH"
|
||||
fi
|
||||
|
||||
find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -name '20*' -mtime "+${BACKUP_RETENTION_DAYS}" -exec rm -rf {} + 2>/dev/null || true
|
||||
find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime +7 -exec rm -rf {} + 2>/dev/null || true
|
||||
log "Retention applied (${BACKUP_RETENTION_DAYS} days)"
|
||||
log "Backup pipeline completed successfully"
|
||||
send_telegram "✅ Daily backup completed: ${DATESTAMP}"
|
||||
|
||||
@@ -50,3 +50,43 @@ def test_manifest_template_is_valid_yaml() -> None:
|
||||
data = yaml.safe_load(Path("docs/laptop-fleet-manifest.example.yaml").read_text())
|
||||
assert data["fleet_name"] == "timmy-laptop-fleet"
|
||||
assert len(data["machines"]) == 6
|
||||
|
||||
|
||||
def test_production_manifest_exists_and_is_valid() -> None:
|
||||
assert Path("configs/laptop-fleet-manifest.yaml").exists()
|
||||
data = yaml.safe_load(Path("configs/laptop-fleet-manifest.yaml").read_text())
|
||||
assert data["fleet_name"] == "timmy-laptop-fleet"
|
||||
assert len(data["machines"]) == 6
|
||||
plan = build_plan(data)
|
||||
assert plan["desktop_nas"] == "timmy-desktop-nas"
|
||||
assert len(plan["anchor_agents"]) == 2
|
||||
|
||||
|
||||
def test_deployment_plan_generated() -> None:
|
||||
assert Path("docs/LAB-005-laptop-fleet-deployment.md").exists()
|
||||
content = Path("docs/LAB-005-laptop-fleet-deployment.md").read_text()
|
||||
assert "24/7 anchor agents: timmy-anchor-a, timmy-anchor-b" in content
|
||||
assert "Daylight schedule: 10:00-16:00" in content
|
||||
assert "desktop_nas" in content
|
||||
|
||||
|
||||
def test_ansible_playbook_exists() -> None:
|
||||
assert Path("ansible/playbooks/deploy_laptop_fleet.yml").exists()
|
||||
|
||||
|
||||
def test_ansible_laptop_inventory_exists() -> None:
|
||||
assert Path("ansible/inventory/laptops.ini").exists()
|
||||
content = Path("ansible/inventory/laptops.ini").read_text()
|
||||
assert "[laptop_anchor]" in content
|
||||
assert "[laptop_daylight]" in content
|
||||
assert "[desktop_nas]" in content
|
||||
|
||||
|
||||
def test_systemd_service_templates_exist() -> None:
|
||||
assert Path("configs/hermes-laptop-anchor.service").exists()
|
||||
assert Path("configs/hermes-laptop-daylight.service").exists()
|
||||
assert Path("configs/hermes-laptop-daylight.timer").exists()
|
||||
anchor = Path("configs/hermes-laptop-anchor.service").read_text()
|
||||
daylight = Path("configs/hermes-laptop-daylight.service").read_text()
|
||||
assert "Restart=always" in anchor
|
||||
assert "RuntimeMaxSec=6h" in daylight
|
||||
|
||||
Reference in New Issue
Block a user