Compare commits

...

3 Commits

Author SHA1 Message Date
b430026ea3 Merge branch 'main' into fix/1123
Some checks failed
Review Approval Gate / verify-review (pull_request) Failing after 9s
CI / test (pull_request) Failing after 1m7s
CI / validate (pull_request) Failing after 1m6s
2026-04-22 01:13:57 +00:00
4dd4206a79 Merge branch 'main' into fix/1123
Some checks failed
Review Approval Gate / verify-review (pull_request) Failing after 12s
CI / test (pull_request) Failing after 1m16s
CI / validate (pull_request) Failing after 1m28s
2026-04-22 01:06:54 +00:00
Alexander Whitestone
d96d1ce2ea feat: wire llama health into night watch (#1123)
Some checks failed
CI / test (pull_request) Failing after 54s
CI / validate (pull_request) Failing after 54s
Review Approval Gate / verify-review (pull_request) Failing after 6s
2026-04-15 02:21:01 -04:00
3 changed files with 98 additions and 2 deletions

View File

@@ -37,6 +37,8 @@ import shutil
import subprocess import subprocess
import sys import sys
import time import time
import urllib.error
import urllib.request
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@@ -119,8 +121,6 @@ def _check_memory(threshold_pct: int = 90) -> tuple[str, str]:
def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhitestone.com") -> tuple[str, str]: def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhitestone.com") -> tuple[str, str]:
"""Return (status, detail) for Gitea HTTPS reachability.""" """Return (status, detail) for Gitea HTTPS reachability."""
import urllib.request
import urllib.error
try: try:
with urllib.request.urlopen(gitea_url, timeout=10) as resp: with urllib.request.urlopen(gitea_url, timeout=10) as resp:
code = resp.status code = resp.status
@@ -131,6 +131,21 @@ def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhiteston
return "WARN", f"Gitea unreachable: {exc}" return "WARN", f"Gitea unreachable: {exc}"
def _check_llama_server(endpoint: str = "http://127.0.0.1:11435") -> tuple[str, str]:
"""Return (status, detail) for the local llama.cpp server health endpoint."""
health_url = f"{endpoint.rstrip('/')}/health"
try:
req = urllib.request.Request(health_url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read().decode())
if data.get("status") == "ok":
model_name = Path(str(data.get("model_path", ""))).name or data.get("model", "unknown-model")
return "OK", f"llama-server healthy at {endpoint} ({model_name})"
return "WARN", f"llama-server unhealthy at {endpoint}: {data}"
except Exception as exc:
return "WARN", f"llama-server unreachable at {endpoint}: {exc}"
def _check_world_readable_secrets() -> tuple[str, str]: def _check_world_readable_secrets() -> tuple[str, str]:
"""Return (status, detail) for world-readable sensitive files.""" """Return (status, detail) for world-readable sensitive files."""
sensitive_patterns = ["*.key", "*.pem", "*.secret", ".env", "*.token"] sensitive_patterns = ["*.key", "*.pem", "*.secret", ".env", "*.token"]
@@ -172,6 +187,9 @@ def generate_report(date_str: str, checker_mod) -> str:
gitea_status, gitea_detail = _check_gitea_reachability() gitea_status, gitea_detail = _check_gitea_reachability()
rows.append(("Alpha VPS", gitea_status, gitea_detail)) rows.append(("Alpha VPS", gitea_status, gitea_detail))
llama_status, llama_detail = _check_llama_server()
rows.append(("Local LLM", llama_status, llama_detail))
sec_status, sec_detail = _check_world_readable_secrets() sec_status, sec_detail = _check_world_readable_secrets()
rows.append(("Security", sec_status, sec_detail)) rows.append(("Security", sec_status, sec_detail))

View File

@@ -40,6 +40,9 @@ Standardizes local LLM inference across the fleet using llama.cpp.
curl -sf http://localhost:11435/health curl -sf http://localhost:11435/health
curl -s http://localhost:11435/v1/models curl -s http://localhost:11435/v1/models
Night Watch integration:
- `bin/night_watch.py` probes the local llama.cpp `/health` endpoint and surfaces failures in the nightly report.
## Troubleshooting ## Troubleshooting
- Won't start → smaller model / lower quant - Won't start → smaller model / lower quant

View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import json
import sys
from pathlib import Path
from unittest.mock import patch
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
class _FakeResponse:
def __init__(self, payload: dict):
self.payload = json.dumps(payload).encode()
def read(self):
return self.payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class _FakeHeartbeatReport:
def to_panel_markdown(self):
return "## Heartbeat Panel\n\nAll jobs healthy."
class _FakeChecker:
@staticmethod
def build_report():
return _FakeHeartbeatReport()
def test_check_llama_server_reports_healthy_model():
import bin.night_watch as nw
with patch(
"bin.night_watch.urllib.request.urlopen",
return_value=_FakeResponse({"status": "ok", "model_path": "/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf"}),
):
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
assert status == "OK"
assert "127.0.0.1:11435" in detail
assert "Qwen2.5-7B-Instruct-Q4_K_M.gguf" in detail
def test_check_llama_server_reports_warning_on_failure():
import bin.night_watch as nw
with patch(
"bin.night_watch.urllib.request.urlopen",
side_effect=OSError("connection refused"),
):
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
assert status == "WARN"
assert "connection refused" in detail
def test_generate_report_includes_local_llm_row():
import bin.night_watch as nw
with patch("bin.night_watch._check_service", return_value=("OK", "hermes-bezalel is active")), \
patch("bin.night_watch._check_disk", return_value=("OK", "disk usage 23%")), \
patch("bin.night_watch._check_memory", return_value=("OK", "memory usage 30%")), \
patch("bin.night_watch._check_gitea_reachability", return_value=("OK", "Gitea HTTPS is responding (200)")), \
patch("bin.night_watch._check_world_readable_secrets", return_value=("OK", "no sensitive recently-modified world-readable files found")), \
patch("bin.night_watch._check_llama_server", return_value=("OK", "llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf)")):
report = nw.generate_report("2026-04-15", _FakeChecker())
assert "| Local LLM | OK | llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf) |" in report
assert "## Heartbeat Panel" in report