Compare commits
1 Commits
fix/cleanu
...
fix/1123
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d96d1ce2ea |
@@ -37,6 +37,8 @@ import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
@@ -119,8 +121,6 @@ def _check_memory(threshold_pct: int = 90) -> tuple[str, str]:
|
||||
|
||||
def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhitestone.com") -> tuple[str, str]:
|
||||
"""Return (status, detail) for Gitea HTTPS reachability."""
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
try:
|
||||
with urllib.request.urlopen(gitea_url, timeout=10) as resp:
|
||||
code = resp.status
|
||||
@@ -131,6 +131,21 @@ def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhiteston
|
||||
return "WARN", f"Gitea unreachable: {exc}"
|
||||
|
||||
|
||||
def _check_llama_server(endpoint: str = "http://127.0.0.1:11435") -> tuple[str, str]:
|
||||
"""Return (status, detail) for the local llama.cpp server health endpoint."""
|
||||
health_url = f"{endpoint.rstrip('/')}/health"
|
||||
try:
|
||||
req = urllib.request.Request(health_url, headers={"Accept": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
if data.get("status") == "ok":
|
||||
model_name = Path(str(data.get("model_path", ""))).name or data.get("model", "unknown-model")
|
||||
return "OK", f"llama-server healthy at {endpoint} ({model_name})"
|
||||
return "WARN", f"llama-server unhealthy at {endpoint}: {data}"
|
||||
except Exception as exc:
|
||||
return "WARN", f"llama-server unreachable at {endpoint}: {exc}"
|
||||
|
||||
|
||||
def _check_world_readable_secrets() -> tuple[str, str]:
|
||||
"""Return (status, detail) for world-readable sensitive files."""
|
||||
sensitive_patterns = ["*.key", "*.pem", "*.secret", ".env", "*.token"]
|
||||
@@ -172,6 +187,9 @@ def generate_report(date_str: str, checker_mod) -> str:
|
||||
gitea_status, gitea_detail = _check_gitea_reachability()
|
||||
rows.append(("Alpha VPS", gitea_status, gitea_detail))
|
||||
|
||||
llama_status, llama_detail = _check_llama_server()
|
||||
rows.append(("Local LLM", llama_status, llama_detail))
|
||||
|
||||
sec_status, sec_detail = _check_world_readable_secrets()
|
||||
rows.append(("Security", sec_status, sec_detail))
|
||||
|
||||
|
||||
@@ -40,6 +40,9 @@ Standardizes local LLM inference across the fleet using llama.cpp.
|
||||
curl -sf http://localhost:11435/health
|
||||
curl -s http://localhost:11435/v1/models
|
||||
|
||||
Night Watch integration:
|
||||
- `bin/night_watch.py` probes the local llama.cpp `/health` endpoint and surfaces failures in the nightly report.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Won't start → smaller model / lower quant
|
||||
|
||||
@@ -168,62 +168,3 @@ else
|
||||
fi
|
||||
|
||||
log "Script complete"
|
||||
|
||||
# ─── Stale Branch Cleanup ─────────────────────────────────
|
||||
# Clean up branches from closed (unmerged) PRs and merged PRs
|
||||
log "Checking for stale branches from closed/merged PRs..."
|
||||
|
||||
# Get all open PRs to avoid deleting active branches
|
||||
OPEN_BRANCHES=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100" | jq -r '.[] | .head.ref' | sort -u)
|
||||
|
||||
# Get all closed PRs (last 100)
|
||||
CLOSED_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=closed&limit=100")
|
||||
|
||||
if [ -n "$CLOSED_PRS" ] && [ "$CLOSED_PRS" != "null" ]; then
|
||||
STALE_BRANCHES=$(echo "$CLOSED_PRS" | jq -r '.[] | select(.merged == false) | .head.ref' | sort -u)
|
||||
MERGED_BRANCHES=$(echo "$CLOSED_PRS" | jq -r '.[] | select(.merged == true) | .head.ref' | sort -u)
|
||||
|
||||
STALE_COUNT=0
|
||||
for branch in $STALE_BRANCHES; do
|
||||
# Skip main/master/develop
|
||||
case "$branch" in main|master|develop|HEAD) continue ;; esac
|
||||
|
||||
# SAFETY CHECK: Skip if branch is still used by an open PR
|
||||
if echo "$OPEN_BRANCHES" | grep -q "^$branch$"; then
|
||||
log "Skipping branch '$branch' - still has an open PR"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log "DRY RUN: Would delete stale branch '$branch' (from closed unmerged PR)"
|
||||
else
|
||||
curl -s -X DELETE -H "$AUTH" "$API/repos/$REPO/branches/$branch" > /dev/null 2>&1 || true
|
||||
log "Deleted stale branch: $branch"
|
||||
fi
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
done
|
||||
|
||||
MERGED_COUNT=0
|
||||
for branch in $MERGED_BRANCHES; do
|
||||
case "$branch" in main|master|develop|HEAD) continue ;; esac
|
||||
|
||||
if echo "$OPEN_BRANCHES" | grep -q "^$branch$"; then
|
||||
log "Skipping branch '$branch' - still has an open PR"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ "$DRY_RUN" = "true" ]; then
|
||||
log "DRY RUN: Would delete merged branch '$branch'"
|
||||
else
|
||||
curl -s -X DELETE -H "$AUTH" "$API/repos/$REPO/branches/$branch" > /dev/null 2>&1 || true
|
||||
log "Deleted merged branch: $branch"
|
||||
fi
|
||||
MERGED_COUNT=$((MERGED_COUNT + 1))
|
||||
done
|
||||
|
||||
log "Stale branch cleanup:"
|
||||
log " Closed (unmerged) branches: $STALE_COUNT"
|
||||
log " Merged branches: $MERGED_COUNT"
|
||||
else
|
||||
log "Could not fetch closed PRs for branch cleanup"
|
||||
fi
|
||||
|
||||
75
tests/test_night_watch_llama.py
Normal file
75
tests/test_night_watch_llama.py
Normal file
@@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, payload: dict):
|
||||
self.payload = json.dumps(payload).encode()
|
||||
|
||||
def read(self):
|
||||
return self.payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
|
||||
class _FakeHeartbeatReport:
|
||||
def to_panel_markdown(self):
|
||||
return "## Heartbeat Panel\n\nAll jobs healthy."
|
||||
|
||||
|
||||
class _FakeChecker:
|
||||
@staticmethod
|
||||
def build_report():
|
||||
return _FakeHeartbeatReport()
|
||||
|
||||
|
||||
def test_check_llama_server_reports_healthy_model():
|
||||
import bin.night_watch as nw
|
||||
|
||||
with patch(
|
||||
"bin.night_watch.urllib.request.urlopen",
|
||||
return_value=_FakeResponse({"status": "ok", "model_path": "/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf"}),
|
||||
):
|
||||
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
|
||||
|
||||
assert status == "OK"
|
||||
assert "127.0.0.1:11435" in detail
|
||||
assert "Qwen2.5-7B-Instruct-Q4_K_M.gguf" in detail
|
||||
|
||||
|
||||
def test_check_llama_server_reports_warning_on_failure():
|
||||
import bin.night_watch as nw
|
||||
|
||||
with patch(
|
||||
"bin.night_watch.urllib.request.urlopen",
|
||||
side_effect=OSError("connection refused"),
|
||||
):
|
||||
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
|
||||
|
||||
assert status == "WARN"
|
||||
assert "connection refused" in detail
|
||||
|
||||
|
||||
def test_generate_report_includes_local_llm_row():
|
||||
import bin.night_watch as nw
|
||||
|
||||
with patch("bin.night_watch._check_service", return_value=("OK", "hermes-bezalel is active")), \
|
||||
patch("bin.night_watch._check_disk", return_value=("OK", "disk usage 23%")), \
|
||||
patch("bin.night_watch._check_memory", return_value=("OK", "memory usage 30%")), \
|
||||
patch("bin.night_watch._check_gitea_reachability", return_value=("OK", "Gitea HTTPS is responding (200)")), \
|
||||
patch("bin.night_watch._check_world_readable_secrets", return_value=("OK", "no sensitive recently-modified world-readable files found")), \
|
||||
patch("bin.night_watch._check_llama_server", return_value=("OK", "llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf)")):
|
||||
report = nw.generate_report("2026-04-15", _FakeChecker())
|
||||
|
||||
assert "| Local LLM | OK | llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf) |" in report
|
||||
assert "## Heartbeat Panel" in report
|
||||
Reference in New Issue
Block a user