Merge branch 'main' into fix/1123

feat: wire llama health into night watch (#1123 )
2026-04-22 01:13:57 +00:00 · 2026-04-22 01:06:54 +00:00 · 2026-04-15 02:21:01 -04:00
3 changed files with 98 additions and 2 deletions
--- a/bin/night_watch.py
+++ b/bin/night_watch.py
@@ -37,6 +37,8 @@ import shutil
 import subprocess
 import sys
 import time
 import urllib.error
 import urllib.request
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional
@@ -119,8 +121,6 @@ def _check_memory(threshold_pct: int = 90) -> tuple[str, str]:
 def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhitestone.com") -> tuple[str, str]:
    """Return (status, detail) for Gitea HTTPS reachability."""
    import urllib.request
    import urllib.error
    try:
        with urllib.request.urlopen(gitea_url, timeout=10) as resp:
            code = resp.status
@@ -131,6 +131,21 @@ def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhiteston
        return "WARN", f"Gitea unreachable: {exc}"
 def _check_llama_server(endpoint: str = "http://127.0.0.1:11435") -> tuple[str, str]:
    """Return (status, detail) for the local llama.cpp server health endpoint."""
    health_url = f"{endpoint.rstrip('/')}/health"
    try:
        req = urllib.request.Request(health_url, headers={"Accept": "application/json"})
        with urllib.request.urlopen(req, timeout=10) as resp:
            data = json.loads(resp.read().decode())
        if data.get("status") == "ok":
            model_name = Path(str(data.get("model_path", ""))).name or data.get("model", "unknown-model")
            return "OK", f"llama-server healthy at {endpoint} ({model_name})"
        return "WARN", f"llama-server unhealthy at {endpoint}: {data}"
    except Exception as exc:
        return "WARN", f"llama-server unreachable at {endpoint}: {exc}"
 def _check_world_readable_secrets() -> tuple[str, str]:
    """Return (status, detail) for world-readable sensitive files."""
    sensitive_patterns = ["*.key", "*.pem", "*.secret", ".env", "*.token"]
@@ -172,6 +187,9 @@ def generate_report(date_str: str, checker_mod) -> str:
    gitea_status, gitea_detail = _check_gitea_reachability()
    rows.append(("Alpha VPS", gitea_status, gitea_detail))
    llama_status, llama_detail = _check_llama_server()
    rows.append(("Local LLM", llama_status, llama_detail))
    sec_status, sec_detail = _check_world_readable_secrets()
    rows.append(("Security", sec_status, sec_detail))
--- a/docs/local-llm.md
+++ b/docs/local-llm.md
@@ -40,6 +40,9 @@ Standardizes local LLM inference across the fleet using llama.cpp.
    curl -sf http://localhost:11435/health
    curl -s http://localhost:11435/v1/models
 Night Watch integration:
 - `bin/night_watch.py` probes the local llama.cpp `/health` endpoint and surfaces failures in the nightly report.
 ## Troubleshooting
 - Won't start → smaller model / lower quant
--- a/tests/test_night_watch_llama.py
+++ b/tests/test_night_watch_llama.py
@@ -0,0 +1,75 @@
 from __future__ import annotations
 import json
 import sys
 from pathlib import Path
 from unittest.mock import patch
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 class _FakeResponse:
    def __init__(self, payload: dict):
        self.payload = json.dumps(payload).encode()
    def read(self):
        return self.payload
    def __enter__(self):
        return self
    def __exit__(self, exc_type, exc, tb):
        return False
 class _FakeHeartbeatReport:
    def to_panel_markdown(self):
        return "## Heartbeat Panel\n\nAll jobs healthy."
 class _FakeChecker:
    @staticmethod
    def build_report():
        return _FakeHeartbeatReport()
 def test_check_llama_server_reports_healthy_model():
    import bin.night_watch as nw
    with patch(
        "bin.night_watch.urllib.request.urlopen",
        return_value=_FakeResponse({"status": "ok", "model_path": "/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf"}),
    ):
        status, detail = nw._check_llama_server("http://127.0.0.1:11435")
    assert status == "OK"
    assert "127.0.0.1:11435" in detail
    assert "Qwen2.5-7B-Instruct-Q4_K_M.gguf" in detail
 def test_check_llama_server_reports_warning_on_failure():
    import bin.night_watch as nw
    with patch(
        "bin.night_watch.urllib.request.urlopen",
        side_effect=OSError("connection refused"),
    ):
        status, detail = nw._check_llama_server("http://127.0.0.1:11435")
    assert status == "WARN"
    assert "connection refused" in detail
 def test_generate_report_includes_local_llm_row():
    import bin.night_watch as nw
    with patch("bin.night_watch._check_service", return_value=("OK", "hermes-bezalel is active")), \
         patch("bin.night_watch._check_disk", return_value=("OK", "disk usage 23%")), \
         patch("bin.night_watch._check_memory", return_value=("OK", "memory usage 30%")), \
         patch("bin.night_watch._check_gitea_reachability", return_value=("OK", "Gitea HTTPS is responding (200)")), \
         patch("bin.night_watch._check_world_readable_secrets", return_value=("OK", "no sensitive recently-modified world-readable files found")), \
         patch("bin.night_watch._check_llama_server", return_value=("OK", "llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf)")):
        report = nw.generate_report("2026-04-15", _FakeChecker())
    assert "| Local LLM | OK | llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf) |" in report
    assert "## Heartbeat Panel" in report
Author	SHA1	Message	Date
Alexander Whitestone	b430026ea3	Merge branch 'main' into fix/1123 Some checks failed Review Approval Gate / verify-review (pull_request) Failing after 9s Details CI / test (pull_request) Failing after 1m7s Details CI / validate (pull_request) Failing after 1m6s Details	2026-04-22 01:13:57 +00:00
Alexander Whitestone	4dd4206a79	Merge branch 'main' into fix/1123 Some checks failed Review Approval Gate / verify-review (pull_request) Failing after 12s Details CI / test (pull_request) Failing after 1m16s Details CI / validate (pull_request) Failing after 1m28s Details	2026-04-22 01:06:54 +00:00
Alexander Whitestone	d96d1ce2ea	feat: wire llama health into night watch (#1123 ) Some checks failed CI / test (pull_request) Failing after 54s Details CI / validate (pull_request) Failing after 54s Details Review Approval Gate / verify-review (pull_request) Failing after 6s Details	2026-04-15 02:21:01 -04:00