Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
d96d1ce2ea feat: wire llama health into night watch (#1123)
Some checks failed
CI / test (pull_request) Failing after 1m34s
Review Approval Gate / verify-review (pull_request) Successful in 12s
CI / validate (pull_request) Failing after 1m6s
2026-04-15 02:21:01 -04:00
7 changed files with 98 additions and 412 deletions

View File

@@ -37,6 +37,8 @@ import shutil
import subprocess
import sys
import time
import urllib.error
import urllib.request
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
@@ -119,8 +121,6 @@ def _check_memory(threshold_pct: int = 90) -> tuple[str, str]:
def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhitestone.com") -> tuple[str, str]:
"""Return (status, detail) for Gitea HTTPS reachability."""
import urllib.request
import urllib.error
try:
with urllib.request.urlopen(gitea_url, timeout=10) as resp:
code = resp.status
@@ -131,6 +131,21 @@ def _check_gitea_reachability(gitea_url: str = "https://forge.alexanderwhiteston
return "WARN", f"Gitea unreachable: {exc}"
def _check_llama_server(endpoint: str = "http://127.0.0.1:11435") -> tuple[str, str]:
"""Return (status, detail) for the local llama.cpp server health endpoint."""
health_url = f"{endpoint.rstrip('/')}/health"
try:
req = urllib.request.Request(health_url, headers={"Accept": "application/json"})
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read().decode())
if data.get("status") == "ok":
model_name = Path(str(data.get("model_path", ""))).name or data.get("model", "unknown-model")
return "OK", f"llama-server healthy at {endpoint} ({model_name})"
return "WARN", f"llama-server unhealthy at {endpoint}: {data}"
except Exception as exc:
return "WARN", f"llama-server unreachable at {endpoint}: {exc}"
def _check_world_readable_secrets() -> tuple[str, str]:
"""Return (status, detail) for world-readable sensitive files."""
sensitive_patterns = ["*.key", "*.pem", "*.secret", ".env", "*.token"]
@@ -172,6 +187,9 @@ def generate_report(date_str: str, checker_mod) -> str:
gitea_status, gitea_detail = _check_gitea_reachability()
rows.append(("Alpha VPS", gitea_status, gitea_detail))
llama_status, llama_detail = _check_llama_server()
rows.append(("Local LLM", llama_status, llama_detail))
sec_status, sec_detail = _check_world_readable_secrets()
rows.append(("Security", sec_status, sec_detail))

View File

@@ -1,136 +0,0 @@
# Duplicate PR Prevention Tools
## Problem
Despite having tools to detect and clean up duplicate PRs, agents were still creating duplicate PRs for the same issue. This was incredibly ironic, especially for issue #1128 which was about cleaning up duplicate PRs.
## Solution
We've created pre-flight check tools that agents should run **before** creating a new PR. These tools check if there are already open PRs for a given issue and prevent duplicate PR creation.
## Tools
### 1. `check-existing-prs.sh` (Bash)
Check if PRs already exist for an issue.
```bash
# Check for existing PRs for issue #1128
./scripts/check-existing-prs.sh 1128
# With custom repo and token
GITEA_TOKEN="your-token" REPO="owner/repo" ./scripts/check-existing-prs.sh 1128
```
**Exit codes:**
- `0`: No existing PRs found (safe to create new PR)
- `1`: Existing PRs found (do not create new PR)
- `2`: Error (API failure, missing parameters, etc.)
### 2. `check_existing_prs.py` (Python)
Python version of the check, suitable for agents that prefer Python.
```bash
# Check for existing PRs for issue #1128
python3 scripts/check_existing_prs.py 1128
# With custom repo and token
python3 scripts/check_existing_prs.py 1128 "owner/repo" "your-token"
```
### 3. `pr-safe.sh` (Wrapper)
User-friendly wrapper that provides guidance.
```bash
# Check and get suggestions
./scripts/pr-safe.sh 1128
# With suggested branch name
./scripts/pr-safe.sh 1128 fix/1128-my-fix
```
## Workflow Integration
### For Agents
Before creating a PR, agents should:
1. Run the check: `./scripts/check-existing-prs.sh <issue_number>`
2. If exit code is `0`, proceed with PR creation
3. If exit code is `1`, review existing PRs instead
### For Humans
Before creating a PR:
1. Run: `./scripts/pr-safe.sh <issue_number>`
2. Follow the guidance provided
## Prevention Strategy
### 1. Pre-flight Checks
Always run a pre-flight check before creating a PR:
```bash
# In your agent workflow
if ./scripts/check-existing-prs.sh $ISSUE_NUMBER; then
# Safe to create PR
create_pr
else
# Don't create PR, review existing ones
review_existing_prs
fi
```
### 2. GitHub Actions Integration
The existing `.github/workflows/pr-duplicate-check.yml` workflow can be enhanced to run these checks automatically.
### 3. Agent Instructions
Add to agent instructions:
```
Before creating a PR for an issue, ALWAYS run:
./scripts/check-existing-prs.sh <issue_number>
If PRs already exist, DO NOT create a new PR.
Instead, review existing PRs and add comments or merge them.
```
## Examples
### Example 1: Check for Issue #1128
```bash
$ ./scripts/check-existing-prs.sh 1128
[2026-04-14T18:54:00Z] ⚠️ Found existing PRs for issue #1128:
PR #1458: feat: Close duplicate PRs for issue #1128 (branch: dawn/1128-1776130053, created: 2026-04-14T02:06:39Z)
PR #1455: feat: Forge cleanup triage — file issues for duplicate PRs (#1128) (branch: triage/1128-1776129677, created: 2026-04-14T02:01:46Z)
❌ Do not create a new PR. Review existing PRs first.
```
### Example 2: Safe to Create PR
```bash
$ ./scripts/check-existing-prs.sh 9999
[2026-04-14T18:54:00Z] ✅ No existing PRs found for issue #9999
Safe to create a new PR
```
## Related Issues
- Issue #1474: [META] Still creating duplicate PRs for issue #1128 despite cleanup
- Issue #1460: [META] I keep creating duplicate PRs for issue #1128
- Issue #1128: [RESOLVED] Forge Cleanup — PRs Closed, Milestones Deduplicated, Policy Issues Filed
## Lessons Learned
1. **Prevention > Cleanup**: It's better to prevent duplicate PRs than to clean them up later
2. **Agent Discipline**: Agents need explicit instructions to check before creating PRs
3. **Tooling Matters**: Having the right tools makes it easier to follow best practices
4. **Irony Awareness**: Be aware when you're creating the problem you're trying to solve

View File

@@ -40,6 +40,9 @@ Standardizes local LLM inference across the fleet using llama.cpp.
curl -sf http://localhost:11435/health
curl -s http://localhost:11435/v1/models
Night Watch integration:
- `bin/night_watch.py` probes the local llama.cpp `/health` endpoint and surfaces failures in the nightly report.
## Troubleshooting
- Won't start → smaller model / lower quant

View File

@@ -1,78 +0,0 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# check-existing-prs.sh — Check if PRs already exist for an issue
#
# This script checks if there are already open PRs for a given issue
# before creating a new one. This prevents duplicate PRs.
#
# Usage:
# ./scripts/check-existing-prs.sh <issue_number>
#
# Exit codes:
# 0 - No existing PRs found (safe to create new PR)
# 1 - Existing PRs found (do not create new PR)
# 2 - Error (API failure, missing parameters, etc.)
#
# Designed for issue #1474: Prevent duplicate PRs
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
# ─── Configuration ──────────────────────────────────────────
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
REPO="${REPO:-Timmy_Foundation/the-nexus}"
ISSUE_NUMBER="${1:?Usage: $0 <issue_number>}"
API="$GITEA_URL/api/v1"
AUTH="Authorization: token $GITEA_TOKEN"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*"; }
# ─── Validate inputs ──────────────────────────────────────
if ! [[ "$ISSUE_NUMBER" =~ ^[0-9]+$ ]]; then
log "ERROR: Issue number must be a positive integer"
exit 2
fi
# ─── Fetch open PRs ────────────────────────────────────────
log "Checking for existing PRs for issue #$ISSUE_NUMBER in $REPO"
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100")
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
log "No open PRs found or API error"
exit 0
fi
# ─── Check for PRs referencing this issue ──────────────────
# Look for PRs that mention the issue number in title or body
MATCHING_PRS=$(echo "$OPEN_PRS" | jq -r --arg issue "#$ISSUE_NUMBER" '
.[] |
select(
(.title | test($issue; "i")) or
(.body | test($issue; "i"))
) |
"PR #\(.number): \(.title) (branch: \(.head.ref), created: \(.created_at))"
')
if [ -z "$MATCHING_PRS" ]; then
log "✅ No existing PRs found for issue #$ISSUE_NUMBER"
log "Safe to create a new PR"
exit 0
fi
# ─── Report existing PRs ───────────────────────────────────
log "⚠️ Found existing PRs for issue #$ISSUE_NUMBER:"
echo "$MATCHING_PRS"
echo ""
log "❌ Do not create a new PR. Review existing PRs first."
log ""
log "Options:"
log " 1. Review and merge an existing PR"
log " 2. Close duplicates and keep the best one"
log " 3. Add comments to existing PRs instead of creating new ones"
log ""
log "To see details of existing PRs:"
log " curl -H \"Authorization: token \$GITEA_TOKEN\" \"$API/repos/$REPO/pulls?state=open\" | jq '.[] | select(.title | test(\"#$ISSUE_NUMBER\"; \"i\"))'"
exit 1

View File

@@ -1,148 +0,0 @@
#!/usr/bin/env python3
"""
Check if PRs already exist for an issue before creating a new one.
This script prevents duplicate PRs by checking if there are already
open PRs for a given issue.
Usage:
python3 scripts/check_existing_prs.py <issue_number>
Exit codes:
0 - No existing PRs found (safe to create new PR)
1 - Existing PRs found (do not create new PR)
2 - Error (API failure, missing parameters, etc.)
Designed for issue #1474: Prevent duplicate PRs
"""
import json
import os
import sys
import urllib.request
import urllib.error
from datetime import datetime
def check_existing_prs(issue_number: int, repo: str = None, token: str = None) -> int:
"""
Check if PRs already exist for an issue.
Args:
issue_number: The issue number to check
repo: Repository in format "owner/repo" (default: from env or "Timmy_Foundation/the-nexus")
token: Gitea API token (default: from GITEA_TOKEN env var)
Returns:
0: No existing PRs found (safe to create new PR)
1: Existing PRs found (do not create new PR)
2: Error (API failure, missing parameters, etc.)
"""
# Get configuration from environment
gitea_url = os.environ.get('GITEA_URL', 'https://forge.alexanderwhitestone.com')
token = token or os.environ.get('GITEA_TOKEN')
repo = repo or os.environ.get('REPO', 'Timmy_Foundation/the-nexus')
if not token:
print("ERROR: GITEA_TOKEN environment variable not set", file=sys.stderr)
return 2
# Validate issue number
if not isinstance(issue_number, int) or issue_number <= 0:
print("ERROR: Issue number must be a positive integer", file=sys.stderr)
return 2
# Build API URL
api_url = f"{gitea_url}/api/v1/repos/{repo}/pulls?state=open&limit=100"
# Make API request
try:
req = urllib.request.Request(api_url, headers={
'Authorization': f'token {token}',
'Content-Type': 'application/json'
})
with urllib.request.urlopen(req, timeout=30) as resp:
prs = json.loads(resp.read())
except urllib.error.URLError as e:
print(f"ERROR: Failed to fetch PRs: {e}", file=sys.stderr)
return 2
except json.JSONDecodeError as e:
print(f"ERROR: Failed to parse API response: {e}", file=sys.stderr)
return 2
except Exception as e:
print(f"ERROR: Unexpected error: {e}", file=sys.stderr)
return 2
# Check for PRs referencing this issue
issue_ref = f"#{issue_number}"
matching_prs = []
for pr in prs:
title = pr.get('title', '')
body = pr.get('body', '') or ''
# Check if issue is referenced in title or body
if issue_ref in title or issue_ref in body:
matching_prs.append(pr)
# Report results
timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
if not matching_prs:
print(f"[{timestamp}] ✅ No existing PRs found for issue #{issue_number}")
print("Safe to create a new PR")
return 0
# Found existing PRs
print(f"[{timestamp}] ⚠️ Found existing PRs for issue #{issue_number}:")
print()
for pr in matching_prs:
pr_number = pr.get('number')
pr_title = pr.get('title')
pr_branch = pr.get('head', {}).get('ref', 'unknown')
pr_created = pr.get('created_at', 'unknown')
pr_url = pr.get('html_url', 'unknown')
print(f" PR #{pr_number}: {pr_title}")
print(f" Branch: {pr_branch}")
print(f" Created: {pr_created}")
print(f" URL: {pr_url}")
print()
print("❌ Do not create a new PR. Review existing PRs first.")
print()
print("Options:")
print(" 1. Review and merge an existing PR")
print(" 2. Close duplicates and keep the best one")
print(" 3. Add comments to existing PRs instead of creating new ones")
print()
print("To see details of existing PRs:")
print(f' curl -H "Authorization: token $GITEA_TOKEN" "{gitea_url}/api/v1/repos/{repo}/pulls?state=open" | jq \'.[] | select(.title | test("#{issue_number}"; "i"))\'')
return 1
def main():
"""Main entry point."""
if len(sys.argv) < 2:
print("Usage: python3 check_existing_prs.py <issue_number>", file=sys.stderr)
print(" python3 check_existing_prs.py <issue_number> [repo] [token]", file=sys.stderr)
return 2
try:
issue_number = int(sys.argv[1])
except ValueError:
print("ERROR: Issue number must be an integer", file=sys.stderr)
return 2
repo = sys.argv[2] if len(sys.argv) > 2 else None
token = sys.argv[3] if len(sys.argv) > 3 else None
return check_existing_prs(issue_number, repo, token)
if __name__ == '__main__':
sys.exit(main())

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# pr-safe.sh — Safe PR creation wrapper
#
# This script checks for existing PRs before creating a new one.
# It's a wrapper around check-existing-prs.sh that provides
# a user-friendly interface.
#
# Usage:
# ./scripts/pr-safe.sh <issue_number> [branch_name]
#
# If branch_name is not provided, it will suggest one based on
# the issue number and current timestamp.
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
ISSUE_NUMBER="${1:?Usage: $0 <issue_number> [branch_name]}"
BRANCH_NAME="${2:-}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "🔍 Checking for existing PRs for issue #$ISSUE_NUMBER..."
echo ""
# Run the check
if "$SCRIPT_DIR/check-existing-prs.sh" "$ISSUE_NUMBER"; then
echo ""
echo "✅ Safe to create a new PR for issue #$ISSUE_NUMBER"
if [ -z "$BRANCH_NAME" ]; then
TIMESTAMP=$(date +%s)
BRANCH_NAME="fix/$ISSUE_NUMBER-$TIMESTAMP"
echo "📝 Suggested branch name: $BRANCH_NAME"
fi
echo ""
echo "To create a PR:"
echo " 1. Create branch: git checkout -b $BRANCH_NAME"
echo " 2. Make your changes"
echo " 3. Commit: git commit -m 'fix: Description (#$ISSUE_NUMBER)'"
echo " 4. Push: git push -u origin $BRANCH_NAME"
echo " 5. Create PR via API or web interface"
else
echo ""
echo "❌ Cannot create new PR for issue #$ISSUE_NUMBER"
echo " Existing PRs found. Review them first."
exit 1
fi

View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import json
import sys
from pathlib import Path
from unittest.mock import patch
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
class _FakeResponse:
def __init__(self, payload: dict):
self.payload = json.dumps(payload).encode()
def read(self):
return self.payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
class _FakeHeartbeatReport:
def to_panel_markdown(self):
return "## Heartbeat Panel\n\nAll jobs healthy."
class _FakeChecker:
@staticmethod
def build_report():
return _FakeHeartbeatReport()
def test_check_llama_server_reports_healthy_model():
import bin.night_watch as nw
with patch(
"bin.night_watch.urllib.request.urlopen",
return_value=_FakeResponse({"status": "ok", "model_path": "/opt/models/llama/Qwen2.5-7B-Instruct-Q4_K_M.gguf"}),
):
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
assert status == "OK"
assert "127.0.0.1:11435" in detail
assert "Qwen2.5-7B-Instruct-Q4_K_M.gguf" in detail
def test_check_llama_server_reports_warning_on_failure():
import bin.night_watch as nw
with patch(
"bin.night_watch.urllib.request.urlopen",
side_effect=OSError("connection refused"),
):
status, detail = nw._check_llama_server("http://127.0.0.1:11435")
assert status == "WARN"
assert "connection refused" in detail
def test_generate_report_includes_local_llm_row():
import bin.night_watch as nw
with patch("bin.night_watch._check_service", return_value=("OK", "hermes-bezalel is active")), \
patch("bin.night_watch._check_disk", return_value=("OK", "disk usage 23%")), \
patch("bin.night_watch._check_memory", return_value=("OK", "memory usage 30%")), \
patch("bin.night_watch._check_gitea_reachability", return_value=("OK", "Gitea HTTPS is responding (200)")), \
patch("bin.night_watch._check_world_readable_secrets", return_value=("OK", "no sensitive recently-modified world-readable files found")), \
patch("bin.night_watch._check_llama_server", return_value=("OK", "llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf)")):
report = nw.generate_report("2026-04-15", _FakeChecker())
assert "| Local LLM | OK | llama-server healthy at http://127.0.0.1:11435 (Qwen2.5-7B-Instruct-Q4_K_M.gguf) |" in report
assert "## Heartbeat Panel" in report