fix: restore self-healing runtime checks
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 9s
PR Checklist / pr-checklist (pull_request) Failing after 1m17s
Smoke Test / smoke (pull_request) Failing after 7s
Validate Config / YAML Lint (pull_request) Failing after 6s
Validate Config / JSON Validate (pull_request) Successful in 6s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 8s
Validate Config / Shell Script Lint (pull_request) Successful in 15s
Validate Config / Cron Syntax Check (pull_request) Successful in 5s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 5s
Validate Config / Playbook Schema Validation (pull_request) Successful in 8s
Architecture Lint / Lint Repository (pull_request) Failing after 8s

This commit is contained in:
Alexander Whitestone
2026-04-12 10:53:55 -04:00
parent f9388f6875
commit b49a0abf39
2 changed files with 61 additions and 0 deletions

View File

@@ -48,6 +48,34 @@ class SelfHealer:
self.log(f" [ERROR] Failed to run remote command on {host}: {e}")
return None
def confirm(self, prompt: str) -> bool:
"""Ask for confirmation unless --yes flag is set."""
if self.yes:
return True
while True:
response = input(f"{prompt} [y/N] ").strip().lower()
if response in ("y", "yes"):
return True
if response in ("n", "no", ""):
return False
print("Please answer 'y' or 'n'.")
def check_llama_server(self, host: str):
ip = FLEET[host]["ip"]
port = FLEET[host]["port"]
try:
requests.get(f"http://{ip}:{port}/health", timeout=2)
except requests.RequestException:
self.log(f" [!] llama-server down on {host}.")
if self.dry_run:
self.log(f" [DRY-RUN] Would restart llama-server on {host}")
else:
if self.confirm(f" Restart llama-server on {host}?"):
self.log(f" Restarting llama-server on {host}...")
self.run_remote(host, "systemctl restart llama-server")
else:
self.log(f" Skipped restart on {host}.")
def check_disk_space(self, host: str):
res = self.run_remote(host, "df -h / | tail -1 | awk '{print $5}' | sed 's/%//'")
if res and res.returncode == 0:

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import importlib.util
import subprocess
from pathlib import Path
from unittest.mock import MagicMock
@@ -60,3 +61,35 @@ class TestMainCli:
healer_cls.assert_called_once_with(dry_run=False, confirm_kill=True, yes=True)
healer.run.assert_called_once_with()
def test_real_default_dry_run_path_completes(self, monkeypatch, capsys):
class FakeExecutor:
def __init__(self):
self.calls = []
def run_script(self, host, command, *, local=False, timeout=None):
self.calls.append((host, command, local, timeout))
if command.startswith("df -h /"):
return subprocess.CompletedProcess(command, 0, stdout="42\n", stderr="")
if command.startswith("free -m"):
return subprocess.CompletedProcess(command, 0, stdout="12.5\n", stderr="")
if command.startswith("ps aux"):
return subprocess.CompletedProcess(command, 0, stdout="", stderr="")
raise AssertionError(f"unexpected command: {command}")
fake_executor = FakeExecutor()
monkeypatch.setattr(sh, "FLEET", {"mac": {"ip": "127.0.0.1", "port": 8080}})
monkeypatch.setattr(sh.requests, "get", lambda url, timeout: object())
monkeypatch.setattr(sh, "VerifiedSSHExecutor", lambda: fake_executor)
sh.main([])
out = capsys.readouterr().out
assert "Starting self-healing cycle (DRY-RUN mode)." in out
assert "Auditing mac..." in out
assert "Cycle complete." in out
assert fake_executor.calls == [
("127.0.0.1", "df -h / | tail -1 | awk '{print $5}' | sed 's/%//'", True, 15),
("127.0.0.1", "free -m | awk '/^Mem:/{print $3/$2 * 100}'", True, 15),
("127.0.0.1", "ps aux --sort=-%cpu | awk 'NR>1 && $3>80 {print $2, $11, $3}'", True, 15),
]