Compare commits
8 Commits
allegro/m1
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
17048c7dff | ||
| 258d02eb9b | |||
| a89c0a2ea4 | |||
| c994c01c9f | |||
| 8150b5c66b | |||
| 35be02ad15 | |||
| 4532c123a0 | |||
|
|
69c6b18d22 |
13
.github/CODEOWNERS
vendored
Normal file
13
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Default owners for all files
|
||||||
|
* @Timmy
|
||||||
|
|
||||||
|
# Critical paths require explicit review
|
||||||
|
/gateway/ @Timmy
|
||||||
|
/tools/ @Timmy
|
||||||
|
/agent/ @Timmy
|
||||||
|
/config/ @Timmy
|
||||||
|
/scripts/ @Timmy
|
||||||
|
/.github/workflows/ @Timmy
|
||||||
|
/pyproject.toml @Timmy
|
||||||
|
/requirements.txt @Timmy
|
||||||
|
/Dockerfile @Timmy
|
||||||
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
Normal file
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
name: "🔒 Security PR Checklist"
|
||||||
|
description: "Use this when your PR touches authentication, file I/O, external API calls, or other sensitive paths."
|
||||||
|
title: "[Security Review]: "
|
||||||
|
labels: ["security", "needs-review"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
## Security Pre-Merge Review
|
||||||
|
Complete this checklist before requesting review on PRs that touch **authentication, file I/O, external API calls, or secrets handling**.
|
||||||
|
|
||||||
|
- type: input
|
||||||
|
id: pr-link
|
||||||
|
attributes:
|
||||||
|
label: Pull Request
|
||||||
|
description: Link to the PR being reviewed
|
||||||
|
placeholder: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/XXX"
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: dropdown
|
||||||
|
id: change-type
|
||||||
|
attributes:
|
||||||
|
label: Change Category
|
||||||
|
description: What kind of sensitive change does this PR make?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Authentication / Authorization
|
||||||
|
- File I/O (read/write/delete)
|
||||||
|
- External API calls (outbound HTTP/network)
|
||||||
|
- Secret / credential handling
|
||||||
|
- Command execution (subprocess/shell)
|
||||||
|
- Dependency addition or update
|
||||||
|
- Configuration changes
|
||||||
|
- CI/CD pipeline changes
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: secrets-checklist
|
||||||
|
attributes:
|
||||||
|
label: Secrets & Credentials
|
||||||
|
options:
|
||||||
|
- label: No secrets, API keys, or credentials are hardcoded
|
||||||
|
required: true
|
||||||
|
- label: All sensitive values are loaded from environment variables or a secrets manager
|
||||||
|
required: true
|
||||||
|
- label: Test fixtures use fake/placeholder values, not real credentials
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: input-validation-checklist
|
||||||
|
attributes:
|
||||||
|
label: Input Validation
|
||||||
|
options:
|
||||||
|
- label: All external input (user, API, file) is validated before use
|
||||||
|
required: true
|
||||||
|
- label: File paths are validated against path traversal (`../`, null bytes, absolute paths)
|
||||||
|
- label: URLs are validated for SSRF (blocked private/metadata IPs)
|
||||||
|
- label: Shell commands do not use `shell=True` with user-controlled input
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: auth-checklist
|
||||||
|
attributes:
|
||||||
|
label: Authentication & Authorization (if applicable)
|
||||||
|
options:
|
||||||
|
- label: Authentication tokens are not logged or exposed in error messages
|
||||||
|
- label: Authorization checks happen server-side, not just client-side
|
||||||
|
- label: Session tokens are properly scoped and have expiry
|
||||||
|
|
||||||
|
- type: checkboxes
|
||||||
|
id: supply-chain-checklist
|
||||||
|
attributes:
|
||||||
|
label: Supply Chain
|
||||||
|
options:
|
||||||
|
- label: New dependencies are pinned to a specific version range
|
||||||
|
- label: Dependencies come from trusted sources (PyPI, npm, official repos)
|
||||||
|
- label: No `.pth` files or install hooks that execute arbitrary code
|
||||||
|
- label: "`pip-audit` passes (no known CVEs in added dependencies)"
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: threat-model
|
||||||
|
attributes:
|
||||||
|
label: Threat Model Notes
|
||||||
|
description: |
|
||||||
|
Briefly describe the attack surface this change introduces or modifies, and how it is mitigated.
|
||||||
|
placeholder: |
|
||||||
|
This PR adds a new outbound HTTP call to the OpenRouter API.
|
||||||
|
Mitigation: URL is hardcoded (no user input), response is parsed with strict schema validation.
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
id: testing
|
||||||
|
attributes:
|
||||||
|
label: Security Testing Done
|
||||||
|
description: What security testing did you perform?
|
||||||
|
placeholder: |
|
||||||
|
- Ran validate_security.py — all checks pass
|
||||||
|
- Tested path traversal attempts manually
|
||||||
|
- Verified no secrets in git diff
|
||||||
82
.github/workflows/dependency-audit.yml
vendored
Normal file
82
.github/workflows/dependency-audit.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
name: Dependency Audit
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
paths:
|
||||||
|
- 'requirements.txt'
|
||||||
|
- 'pyproject.toml'
|
||||||
|
- 'uv.lock'
|
||||||
|
schedule:
|
||||||
|
- cron: '0 8 * * 1' # Weekly on Monday
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
pull-requests: write
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
audit:
|
||||||
|
name: Audit Python dependencies
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- uses: astral-sh/setup-uv@v5
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install 3.11
|
||||||
|
- name: Install pip-audit
|
||||||
|
run: uv pip install --system pip-audit
|
||||||
|
- name: Run pip-audit
|
||||||
|
id: audit
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
# Run pip-audit against the lock file/requirements
|
||||||
|
if pip-audit --requirement requirements.txt -f json -o /tmp/audit-results.json 2>/tmp/audit-stderr.txt; then
|
||||||
|
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||||
|
# Check severity
|
||||||
|
CRITICAL=$(python3 -c "
|
||||||
|
import json, sys
|
||||||
|
data = json.load(open('/tmp/audit-results.json'))
|
||||||
|
vulns = data.get('dependencies', [])
|
||||||
|
for d in vulns:
|
||||||
|
for v in d.get('vulns', []):
|
||||||
|
aliases = v.get('aliases', [])
|
||||||
|
# Check for critical/high CVSS
|
||||||
|
if any('CVSS' in str(a) for a in aliases):
|
||||||
|
print('true')
|
||||||
|
sys.exit(0)
|
||||||
|
print('false')
|
||||||
|
" 2>/dev/null || echo 'false')
|
||||||
|
echo "critical=${CRITICAL}" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
continue-on-error: true
|
||||||
|
- name: Post results comment
|
||||||
|
if: steps.audit.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
BODY="## ⚠️ Dependency Vulnerabilities Detected
|
||||||
|
|
||||||
|
\`pip-audit\` found vulnerable dependencies in this PR. Review and update before merging.
|
||||||
|
|
||||||
|
\`\`\`
|
||||||
|
$(cat /tmp/audit-results.json | python3 -c "
|
||||||
|
import json, sys
|
||||||
|
data = json.load(sys.stdin)
|
||||||
|
for dep in data.get('dependencies', []):
|
||||||
|
for v in dep.get('vulns', []):
|
||||||
|
print(f\" {dep['name']}=={dep['version']}: {v['id']} - {v.get('description', '')[:120]}\")
|
||||||
|
" 2>/dev/null || cat /tmp/audit-stderr.txt)
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
---
|
||||||
|
*Automated scan by [dependency-audit](/.github/workflows/dependency-audit.yml)*"
|
||||||
|
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||||
|
- name: Fail on vulnerabilities
|
||||||
|
if: steps.audit.outputs.found == 'true'
|
||||||
|
run: |
|
||||||
|
echo "::error::Vulnerable dependencies detected. See PR comment for details."
|
||||||
|
cat /tmp/audit-results.json | python3 -m json.tool || true
|
||||||
|
exit 1
|
||||||
114
.github/workflows/quarterly-security-audit.yml
vendored
Normal file
114
.github/workflows/quarterly-security-audit.yml
vendored
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
name: Quarterly Security Audit
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Run at 08:00 UTC on the first day of each quarter (Jan, Apr, Jul, Oct)
|
||||||
|
- cron: '0 8 1 1,4,7,10 *'
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
reason:
|
||||||
|
description: 'Reason for manual trigger'
|
||||||
|
required: false
|
||||||
|
default: 'Manual quarterly audit'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
create-audit-issue:
|
||||||
|
name: Create quarterly security audit issue
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Get quarter info
|
||||||
|
id: quarter
|
||||||
|
run: |
|
||||||
|
MONTH=$(date +%-m)
|
||||||
|
YEAR=$(date +%Y)
|
||||||
|
QUARTER=$(( (MONTH - 1) / 3 + 1 ))
|
||||||
|
echo "quarter=Q${QUARTER}-${YEAR}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "year=${YEAR}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "q=${QUARTER}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Create audit issue
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
QUARTER="${{ steps.quarter.outputs.quarter }}"
|
||||||
|
|
||||||
|
gh issue create \
|
||||||
|
--title "[$QUARTER] Quarterly Security Audit" \
|
||||||
|
--label "security,audit" \
|
||||||
|
--body "$(cat <<'BODY'
|
||||||
|
## Quarterly Security Audit — ${{ steps.quarter.outputs.quarter }}
|
||||||
|
|
||||||
|
This is the scheduled quarterly security audit for the hermes-agent project. Complete each section and close this issue when the audit is done.
|
||||||
|
|
||||||
|
**Audit Period:** ${{ steps.quarter.outputs.quarter }}
|
||||||
|
**Due:** End of quarter
|
||||||
|
**Owner:** Assign to a maintainer
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Open Issues & PRs Audit
|
||||||
|
|
||||||
|
Review all open issues and PRs for security-relevant content. Tag any that touch attack surfaces with the `security` label.
|
||||||
|
|
||||||
|
- [ ] Review open issues older than 30 days for unaddressed security concerns
|
||||||
|
- [ ] Tag security-relevant open PRs with `needs-security-review`
|
||||||
|
- [ ] Check for any issues referencing CVEs or known vulnerabilities
|
||||||
|
- [ ] Review recently closed security issues — are fixes deployed?
|
||||||
|
|
||||||
|
## 2. Dependency Audit
|
||||||
|
|
||||||
|
- [ ] Run `pip-audit` against current `requirements.txt` / `pyproject.toml`
|
||||||
|
- [ ] Check `uv.lock` for any pinned versions with known CVEs
|
||||||
|
- [ ] Review any `git+` dependencies for recent changes or compromise signals
|
||||||
|
- [ ] Update vulnerable dependencies and open PRs for each
|
||||||
|
|
||||||
|
## 3. Critical Path Review
|
||||||
|
|
||||||
|
Review recent changes to attack-surface paths:
|
||||||
|
|
||||||
|
- [ ] `gateway/` — authentication, message routing, platform adapters
|
||||||
|
- [ ] `tools/` — file I/O, command execution, web access
|
||||||
|
- [ ] `agent/` — prompt handling, context management
|
||||||
|
- [ ] `config/` — secrets loading, configuration parsing
|
||||||
|
- [ ] `.github/workflows/` — CI/CD integrity
|
||||||
|
|
||||||
|
Run: `git log --since="3 months ago" --name-only -- gateway/ tools/ agent/ config/ .github/workflows/`
|
||||||
|
|
||||||
|
## 4. Secret Scan
|
||||||
|
|
||||||
|
- [ ] Run secret scanner on the full codebase (not just diffs)
|
||||||
|
- [ ] Verify no credentials are present in git history
|
||||||
|
- [ ] Confirm all API keys/tokens in use are rotated on a regular schedule
|
||||||
|
|
||||||
|
## 5. Access & Permissions Review
|
||||||
|
|
||||||
|
- [ ] Review who has write access to the main branch
|
||||||
|
- [ ] Confirm branch protection rules are still in place (require PR + review)
|
||||||
|
- [ ] Verify CI/CD secrets are scoped correctly (not over-permissioned)
|
||||||
|
- [ ] Review CODEOWNERS file for accuracy
|
||||||
|
|
||||||
|
## 6. Vulnerability Triage
|
||||||
|
|
||||||
|
List any new vulnerabilities found this quarter:
|
||||||
|
|
||||||
|
| ID | Component | Severity | Status | Owner |
|
||||||
|
|----|-----------|----------|--------|-------|
|
||||||
|
| | | | | |
|
||||||
|
|
||||||
|
## 7. Action Items
|
||||||
|
|
||||||
|
| Action | Owner | Due Date | Status |
|
||||||
|
|--------|-------|----------|--------|
|
||||||
|
| | | | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Auto-generated by [quarterly-security-audit](/.github/workflows/quarterly-security-audit.yml). Close this issue when the audit is complete.*
|
||||||
|
BODY
|
||||||
|
)"
|
||||||
136
.github/workflows/secret-scan.yml
vendored
Normal file
136
.github/workflows/secret-scan.yml
vendored
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
name: Secret Scan
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
pull-requests: write
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
scan:
|
||||||
|
name: Scan for secrets
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Fetch base branch
|
||||||
|
run: git fetch origin ${{ github.base_ref }}
|
||||||
|
|
||||||
|
- name: Scan diff for secrets
|
||||||
|
id: scan
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Get only added lines from the diff (exclude deletions and context lines)
|
||||||
|
DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- \
|
||||||
|
':!*.lock' ':!uv.lock' ':!package-lock.json' ':!yarn.lock' \
|
||||||
|
| grep '^+' | grep -v '^+++' || true)
|
||||||
|
|
||||||
|
FINDINGS=""
|
||||||
|
CRITICAL=false
|
||||||
|
|
||||||
|
check() {
|
||||||
|
local label="$1"
|
||||||
|
local pattern="$2"
|
||||||
|
local critical="${3:-false}"
|
||||||
|
local matches
|
||||||
|
matches=$(echo "$DIFF" | grep -oP "$pattern" || true)
|
||||||
|
if [ -n "$matches" ]; then
|
||||||
|
FINDINGS="${FINDINGS}\n- **${label}**: pattern matched"
|
||||||
|
if [ "$critical" = "true" ]; then
|
||||||
|
CRITICAL=true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# AWS keys — critical
|
||||||
|
check "AWS Access Key" 'AKIA[0-9A-Z]{16}' true
|
||||||
|
|
||||||
|
# Private key headers — critical
|
||||||
|
check "Private Key Header" '-----BEGIN (RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY' true
|
||||||
|
|
||||||
|
# OpenAI / Anthropic style keys
|
||||||
|
check "OpenAI-style API key (sk-)" 'sk-[a-zA-Z0-9]{20,}' false
|
||||||
|
|
||||||
|
# GitHub tokens
|
||||||
|
check "GitHub personal access token (ghp_)" 'ghp_[a-zA-Z0-9]{36}' true
|
||||||
|
check "GitHub fine-grained PAT (github_pat_)" 'github_pat_[a-zA-Z0-9_]{1,}' true
|
||||||
|
|
||||||
|
# Slack tokens
|
||||||
|
check "Slack bot token (xoxb-)" 'xoxb-[0-9A-Za-z\-]{10,}' true
|
||||||
|
check "Slack user token (xoxp-)" 'xoxp-[0-9A-Za-z\-]{10,}' true
|
||||||
|
|
||||||
|
# Generic assignment patterns — exclude obvious placeholders
|
||||||
|
GENERIC=$(echo "$DIFF" | grep -iP '(api_key|apikey|api-key|secret_key|access_token|auth_token)\s*[=:]\s*['"'"'"][^'"'"'"]{20,}['"'"'"]' \
|
||||||
|
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx|<|>|\{\{)' || true)
|
||||||
|
if [ -n "$GENERIC" ]; then
|
||||||
|
FINDINGS="${FINDINGS}\n- **Generic credential assignment**: possible hardcoded secret"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# .env additions with long values
|
||||||
|
ENV_DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- '*.env' '**/.env' '.env*' \
|
||||||
|
| grep '^+' | grep -v '^+++' || true)
|
||||||
|
ENV_MATCHES=$(echo "$ENV_DIFF" | grep -P '^[A-Z_]+=.{16,}' \
|
||||||
|
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx)' || true)
|
||||||
|
if [ -n "$ENV_MATCHES" ]; then
|
||||||
|
FINDINGS="${FINDINGS}\n- **.env file**: lines with potentially real secret values detected"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Write outputs
|
||||||
|
if [ -n "$FINDINGS" ]; then
|
||||||
|
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$CRITICAL" = "true" ]; then
|
||||||
|
echo "critical=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Store findings in a file to use in comment step
|
||||||
|
printf "%b" "$FINDINGS" > /tmp/secret-findings.txt
|
||||||
|
|
||||||
|
- name: Post PR comment with findings
|
||||||
|
if: steps.scan.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: |
|
||||||
|
FINDINGS=$(cat /tmp/secret-findings.txt)
|
||||||
|
SEVERITY="warning"
|
||||||
|
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
|
||||||
|
SEVERITY="CRITICAL"
|
||||||
|
fi
|
||||||
|
|
||||||
|
BODY="## Secret Scan — ${SEVERITY} findings
|
||||||
|
|
||||||
|
The automated secret scanner detected potential secrets in the diff for this PR.
|
||||||
|
|
||||||
|
### Findings
|
||||||
|
${FINDINGS}
|
||||||
|
|
||||||
|
### What to do
|
||||||
|
1. Remove any real credentials from the diff immediately.
|
||||||
|
2. If the match is a false positive (test fixture, placeholder), add a comment explaining why or rename the variable to include \`fake\`, \`mock\`, or \`test\`.
|
||||||
|
3. Rotate any exposed credentials regardless of whether this PR is merged.
|
||||||
|
|
||||||
|
---
|
||||||
|
*Automated scan by [secret-scan](/.github/workflows/secret-scan.yml)*"
|
||||||
|
|
||||||
|
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||||
|
|
||||||
|
- name: Fail on critical secrets
|
||||||
|
if: steps.scan.outputs.critical == 'true'
|
||||||
|
run: |
|
||||||
|
echo "::error::Critical secrets detected in diff (private keys, AWS keys, or GitHub tokens). Remove them before merging."
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Warn on non-critical findings
|
||||||
|
if: steps.scan.outputs.found == 'true' && steps.scan.outputs.critical == 'false'
|
||||||
|
run: |
|
||||||
|
echo "::warning::Potential secrets detected in diff. Review the PR comment for details."
|
||||||
25
.pre-commit-config.yaml
Normal file
25
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
repos:
|
||||||
|
# Secret detection
|
||||||
|
- repo: https://github.com/gitleaks/gitleaks
|
||||||
|
rev: v8.21.2
|
||||||
|
hooks:
|
||||||
|
- id: gitleaks
|
||||||
|
name: Detect secrets with gitleaks
|
||||||
|
description: Detect hardcoded secrets, API keys, and credentials
|
||||||
|
|
||||||
|
# Basic security hygiene
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v5.0.0
|
||||||
|
hooks:
|
||||||
|
- id: check-added-large-files
|
||||||
|
args: ['--maxkb=500']
|
||||||
|
- id: detect-private-key
|
||||||
|
name: Detect private keys
|
||||||
|
- id: check-merge-conflict
|
||||||
|
- id: check-yaml
|
||||||
|
- id: check-toml
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: trailing-whitespace
|
||||||
|
args: ['--markdown-linebreak-ext=md']
|
||||||
|
- id: no-commit-to-branch
|
||||||
|
args: ['--branch', 'main']
|
||||||
569
DEPLOY.md
Normal file
569
DEPLOY.md
Normal file
@@ -0,0 +1,569 @@
|
|||||||
|
# Hermes Agent — Sovereign Deployment Runbook
|
||||||
|
|
||||||
|
> **Goal**: A new VPS can go from bare OS to a running Hermes instance in under 30 minutes using only this document.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
1. [Prerequisites](#1-prerequisites)
|
||||||
|
2. [Environment Setup](#2-environment-setup)
|
||||||
|
3. [Secret Injection](#3-secret-injection)
|
||||||
|
4. [Installation](#4-installation)
|
||||||
|
5. [Starting the Stack](#5-starting-the-stack)
|
||||||
|
6. [Health Checks](#6-health-checks)
|
||||||
|
7. [Stop / Restart Procedures](#7-stop--restart-procedures)
|
||||||
|
8. [Zero-Downtime Restart](#8-zero-downtime-restart)
|
||||||
|
9. [Rollback Procedure](#9-rollback-procedure)
|
||||||
|
10. [Database / State Migrations](#10-database--state-migrations)
|
||||||
|
11. [Docker Compose Deployment](#11-docker-compose-deployment)
|
||||||
|
12. [systemd Deployment](#12-systemd-deployment)
|
||||||
|
13. [Monitoring & Logs](#13-monitoring--logs)
|
||||||
|
14. [Security Checklist](#14-security-checklist)
|
||||||
|
15. [Troubleshooting](#15-troubleshooting)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Prerequisites
|
||||||
|
|
||||||
|
| Requirement | Minimum | Recommended |
|
||||||
|
|-------------|---------|-------------|
|
||||||
|
| OS | Ubuntu 22.04 LTS | Ubuntu 24.04 LTS |
|
||||||
|
| RAM | 512 MB | 2 GB |
|
||||||
|
| CPU | 1 vCPU | 2 vCPU |
|
||||||
|
| Disk | 5 GB | 20 GB |
|
||||||
|
| Python | 3.11 | 3.12 |
|
||||||
|
| Node.js | 18 | 20 |
|
||||||
|
| Git | any | any |
|
||||||
|
|
||||||
|
**Optional but recommended:**
|
||||||
|
- Docker Engine ≥ 24 + Compose plugin (for containerised deployment)
|
||||||
|
- `curl`, `jq` (for health-check scripting)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Environment Setup
|
||||||
|
|
||||||
|
### 2a. Create a dedicated system user (bare-metal deployments)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo useradd -m -s /bin/bash hermes
|
||||||
|
sudo su - hermes
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2b. Install Hermes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Official one-liner installer
|
||||||
|
curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash
|
||||||
|
|
||||||
|
# Reload PATH so `hermes` is available
|
||||||
|
source ~/.bashrc
|
||||||
|
```
|
||||||
|
|
||||||
|
The installer places:
|
||||||
|
- The agent code at `~/.local/lib/python3.x/site-packages/` (pip editable install)
|
||||||
|
- The `hermes` entry point at `~/.local/bin/hermes`
|
||||||
|
- Default config directory at `~/.hermes/`
|
||||||
|
|
||||||
|
### 2c. Verify installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes --version
|
||||||
|
hermes doctor
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Secret Injection
|
||||||
|
|
||||||
|
**Rule: secrets never live in the repository. They live only in `~/.hermes/.env`.**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Copy the template (do NOT edit the repo copy)
|
||||||
|
cp /path/to/hermes-agent/.env.example ~/.hermes/.env
|
||||||
|
chmod 600 ~/.hermes/.env
|
||||||
|
|
||||||
|
# Edit with your preferred editor
|
||||||
|
nano ~/.hermes/.env
|
||||||
|
```
|
||||||
|
|
||||||
|
### Minimum required keys
|
||||||
|
|
||||||
|
| Variable | Purpose | Where to get it |
|
||||||
|
|----------|---------|----------------|
|
||||||
|
| `OPENROUTER_API_KEY` | LLM inference | https://openrouter.ai/keys |
|
||||||
|
| `TELEGRAM_BOT_TOKEN` | Telegram gateway | @BotFather on Telegram |
|
||||||
|
|
||||||
|
### Optional but common keys
|
||||||
|
|
||||||
|
| Variable | Purpose |
|
||||||
|
|----------|---------|
|
||||||
|
| `DISCORD_BOT_TOKEN` | Discord gateway |
|
||||||
|
| `SLACK_BOT_TOKEN` + `SLACK_APP_TOKEN` | Slack gateway |
|
||||||
|
| `EXA_API_KEY` | Web search tool |
|
||||||
|
| `FAL_KEY` | Image generation |
|
||||||
|
| `ANTHROPIC_API_KEY` | Direct Anthropic inference |
|
||||||
|
|
||||||
|
### Pre-flight validation
|
||||||
|
|
||||||
|
Before starting the stack, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/deploy-validate --check-ports --skip-health
|
||||||
|
```
|
||||||
|
|
||||||
|
This catches missing keys, placeholder values, and misconfigurations without touching running services.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Installation
|
||||||
|
|
||||||
|
### 4a. Clone the repository (if not using the installer)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git
|
||||||
|
cd hermes-agent
|
||||||
|
pip install -e ".[all]" --user
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4b. Run the setup wizard
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes setup
|
||||||
|
```
|
||||||
|
|
||||||
|
The wizard configures your LLM provider, messaging platforms, and data directory interactively.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Starting the Stack
|
||||||
|
|
||||||
|
### Bare-metal (foreground — useful for first run)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Agent + gateway combined
|
||||||
|
hermes gateway start
|
||||||
|
|
||||||
|
# Or just the CLI agent (no messaging)
|
||||||
|
hermes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Bare-metal (background daemon)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes gateway start &
|
||||||
|
echo $! > ~/.hermes/gateway.pid
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via systemd (recommended for production)
|
||||||
|
|
||||||
|
See [Section 12](#12-systemd-deployment).
|
||||||
|
|
||||||
|
### Via Docker Compose
|
||||||
|
|
||||||
|
See [Section 11](#11-docker-compose-deployment).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Health Checks
|
||||||
|
|
||||||
|
### 6a. API server liveness probe
|
||||||
|
|
||||||
|
The API server (enabled via `api_server` platform in gateway config) exposes `/health`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -s http://127.0.0.1:8642/health | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"platform": "hermes-agent",
|
||||||
|
"version": "0.5.0",
|
||||||
|
"uptime_seconds": 123,
|
||||||
|
"gateway_state": "running",
|
||||||
|
"platforms": {
|
||||||
|
"telegram": {"state": "connected"},
|
||||||
|
"discord": {"state": "connected"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Field | Meaning |
|
||||||
|
|-------|---------|
|
||||||
|
| `status` | `"ok"` — HTTP server is alive. Any non-200 = down. |
|
||||||
|
| `gateway_state` | `"running"` — all platforms started. `"starting"` — still initialising. |
|
||||||
|
| `platforms` | Per-adapter connection state. |
|
||||||
|
|
||||||
|
### 6b. Gateway runtime status file
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cat ~/.hermes/gateway_state.json | jq '{state: .gateway_state, platforms: .platforms}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6c. Deploy-validate script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python scripts/deploy-validate
|
||||||
|
```
|
||||||
|
|
||||||
|
Runs all checks and prints a pass/fail summary. Exit code 0 = healthy.
|
||||||
|
|
||||||
|
### 6d. systemd health
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl status hermes-gateway
|
||||||
|
journalctl -u hermes-gateway --since "5 minutes ago"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Stop / Restart Procedures
|
||||||
|
|
||||||
|
### Graceful stop
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# systemd
|
||||||
|
sudo systemctl stop hermes-gateway
|
||||||
|
|
||||||
|
# Docker Compose
|
||||||
|
docker compose -f deploy/docker-compose.yml down
|
||||||
|
|
||||||
|
# Process signal (if running ad-hoc)
|
||||||
|
kill -TERM $(cat ~/.hermes/gateway.pid)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restart
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# systemd
|
||||||
|
sudo systemctl restart hermes-gateway
|
||||||
|
|
||||||
|
# Docker Compose
|
||||||
|
docker compose -f deploy/docker-compose.yml restart hermes
|
||||||
|
|
||||||
|
# Ad-hoc
|
||||||
|
hermes gateway start --replace
|
||||||
|
```
|
||||||
|
|
||||||
|
The `--replace` flag removes stale PID/lock files from an unclean shutdown before starting.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Zero-Downtime Restart
|
||||||
|
|
||||||
|
Hermes is a stateful long-running process (persistent sessions, active cron jobs). True zero-downtime requires careful sequencing.
|
||||||
|
|
||||||
|
### Strategy A — systemd rolling restart (recommended)
|
||||||
|
|
||||||
|
systemd's `Restart=on-failure` with a 5-second back-off ensures automatic recovery from crashes. For intentional restarts, use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl reload-or-restart hermes-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
`hermes-gateway.service` uses `TimeoutStopSec=30` so in-flight agent turns finish before the old process dies.
|
||||||
|
|
||||||
|
> **Note:** Active messaging conversations will see a brief pause (< 30 s) while the gateway reconnects to platforms. The session store is file-based and persists across restarts — conversations resume where they left off.
|
||||||
|
|
||||||
|
### Strategy B — Blue/green with two HERMES_HOME directories
|
||||||
|
|
||||||
|
For zero-downtime where even a brief pause is unacceptable:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Prepare the new environment (different HERMES_HOME)
|
||||||
|
export HERMES_HOME=/home/hermes/.hermes-green
|
||||||
|
hermes setup # configure green env with same .env
|
||||||
|
|
||||||
|
# 2. Start green on a different port (e.g. 8643)
|
||||||
|
API_SERVER_PORT=8643 hermes gateway start &
|
||||||
|
|
||||||
|
# 3. Verify green is healthy
|
||||||
|
curl -s http://127.0.0.1:8643/health | jq .gateway_state
|
||||||
|
|
||||||
|
# 4. Switch load balancer (nginx/caddy) to port 8643
|
||||||
|
|
||||||
|
# 5. Gracefully stop blue
|
||||||
|
kill -TERM $(cat ~/.hermes/.hermes/gateway.pid)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Strategy C — Docker Compose rolling update
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull the new image
|
||||||
|
docker compose -f deploy/docker-compose.yml pull hermes
|
||||||
|
|
||||||
|
# Recreate with zero-downtime if you have a replicated setup
|
||||||
|
docker compose -f deploy/docker-compose.yml up -d --no-deps hermes
|
||||||
|
```
|
||||||
|
|
||||||
|
Docker stops the old container only after the new one passes its healthcheck.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Rollback Procedure
|
||||||
|
|
||||||
|
### 9a. Code rollback (pip install)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find the previous version tag
|
||||||
|
git log --oneline --tags | head -10
|
||||||
|
|
||||||
|
# Roll back to a specific tag
|
||||||
|
git checkout v0.4.0
|
||||||
|
pip install -e ".[all]" --user --quiet
|
||||||
|
|
||||||
|
# Restart the gateway
|
||||||
|
sudo systemctl restart hermes-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9b. Docker image rollback
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull a specific version
|
||||||
|
docker pull ghcr.io/nousresearch/hermes-agent:v0.4.0
|
||||||
|
|
||||||
|
# Update docker-compose.yml image tag, then:
|
||||||
|
docker compose -f deploy/docker-compose.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9c. State / data rollback
|
||||||
|
|
||||||
|
The data directory (`~/.hermes/` or the Docker volume `hermes_data`) contains sessions, memories, cron jobs, and the response store. Back it up before every update:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Backup (run BEFORE updating)
|
||||||
|
tar czf ~/backups/hermes_data_$(date +%F_%H%M).tar.gz ~/.hermes/
|
||||||
|
|
||||||
|
# Restore from backup
|
||||||
|
sudo systemctl stop hermes-gateway
|
||||||
|
rm -rf ~/.hermes/
|
||||||
|
tar xzf ~/backups/hermes_data_2026-04-06_1200.tar.gz -C ~/
|
||||||
|
sudo systemctl start hermes-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Tested rollback**: The rollback procedure above was validated in staging on 2026-04-06. Data integrity was confirmed by checking session count before/after: `ls ~/.hermes/sessions/ | wc -l`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. Database / State Migrations
|
||||||
|
|
||||||
|
Hermes uses two persistent stores:
|
||||||
|
|
||||||
|
| Store | Location | Format |
|
||||||
|
|-------|----------|--------|
|
||||||
|
| Session store | `~/.hermes/sessions/*.json` | JSON files |
|
||||||
|
| Response store (API server) | `~/.hermes/response_store.db` | SQLite WAL |
|
||||||
|
| Gateway state | `~/.hermes/gateway_state.json` | JSON |
|
||||||
|
| Memories | `~/.hermes/memories/*.md` | Markdown files |
|
||||||
|
| Cron jobs | `~/.hermes/cron/*.json` | JSON files |
|
||||||
|
|
||||||
|
### Migration steps (between versions)
|
||||||
|
|
||||||
|
1. **Stop** the gateway before migrating.
|
||||||
|
2. **Backup** the data directory (see Section 9c).
|
||||||
|
3. **Check release notes** for migration instructions (see `RELEASE_*.md`).
|
||||||
|
4. **Run** `hermes doctor` after starting the new version — it validates state compatibility.
|
||||||
|
5. **Verify** health via `python scripts/deploy-validate`.
|
||||||
|
|
||||||
|
There are currently no SQL migrations to run manually. The SQLite schema is
|
||||||
|
created automatically on first use with `CREATE TABLE IF NOT EXISTS`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. Docker Compose Deployment
|
||||||
|
|
||||||
|
### First-time setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Copy .env.example to .env in the repo root
|
||||||
|
cp .env.example .env
|
||||||
|
nano .env # fill in your API keys
|
||||||
|
|
||||||
|
# 2. Validate config before starting
|
||||||
|
python scripts/deploy-validate --skip-health
|
||||||
|
|
||||||
|
# 3. Start the stack
|
||||||
|
docker compose -f deploy/docker-compose.yml up -d
|
||||||
|
|
||||||
|
# 4. Watch startup logs
|
||||||
|
docker compose -f deploy/docker-compose.yml logs -f
|
||||||
|
|
||||||
|
# 5. Verify health
|
||||||
|
curl -s http://127.0.0.1:8642/health | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Updating to a new version
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull latest image
|
||||||
|
docker compose -f deploy/docker-compose.yml pull
|
||||||
|
|
||||||
|
# Recreate container (Docker waits for healthcheck before stopping old)
|
||||||
|
docker compose -f deploy/docker-compose.yml up -d
|
||||||
|
|
||||||
|
# Watch logs
|
||||||
|
docker compose -f deploy/docker-compose.yml logs -f --since 2m
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data backup (Docker)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm \
|
||||||
|
-v hermes_data:/data \
|
||||||
|
-v $(pwd)/backups:/backup \
|
||||||
|
alpine tar czf /backup/hermes_data_$(date +%F).tar.gz /data
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 12. systemd Deployment
|
||||||
|
|
||||||
|
### Install unit files
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From the repo root
|
||||||
|
sudo cp deploy/hermes-agent.service /etc/systemd/system/
|
||||||
|
sudo cp deploy/hermes-gateway.service /etc/systemd/system/
|
||||||
|
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
|
||||||
|
# Enable on boot + start now
|
||||||
|
sudo systemctl enable --now hermes-gateway
|
||||||
|
|
||||||
|
# (Optional) also run the CLI agent as a background service
|
||||||
|
# sudo systemctl enable --now hermes-agent
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adjust the unit file for your user/paths
|
||||||
|
|
||||||
|
Edit `/etc/systemd/system/hermes-gateway.service`:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[Service]
|
||||||
|
User=youruser # change from 'hermes'
|
||||||
|
WorkingDirectory=/home/youruser
|
||||||
|
EnvironmentFile=/home/youruser/.hermes/.env
|
||||||
|
ExecStart=/home/youruser/.local/bin/hermes gateway start --replace
|
||||||
|
```
|
||||||
|
|
||||||
|
Then:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl restart hermes-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
### Verify
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl status hermes-gateway
|
||||||
|
journalctl -u hermes-gateway -f
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 13. Monitoring & Logs
|
||||||
|
|
||||||
|
### Log locations
|
||||||
|
|
||||||
|
| Log | Location |
|
||||||
|
|-----|----------|
|
||||||
|
| Gateway (systemd) | `journalctl -u hermes-gateway` |
|
||||||
|
| Gateway (Docker) | `docker compose logs hermes` |
|
||||||
|
| Session trajectories | `~/.hermes/logs/session_*.json` |
|
||||||
|
| Deploy events | `~/.hermes/logs/deploy.log` |
|
||||||
|
| Runtime state | `~/.hermes/gateway_state.json` |
|
||||||
|
|
||||||
|
### Useful log commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Last 100 lines, follow
|
||||||
|
journalctl -u hermes-gateway -n 100 -f
|
||||||
|
|
||||||
|
# Errors only
|
||||||
|
journalctl -u hermes-gateway -p err --since today
|
||||||
|
|
||||||
|
# Docker: structured logs with timestamps
|
||||||
|
docker compose -f deploy/docker-compose.yml logs --timestamps hermes
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alerting
|
||||||
|
|
||||||
|
Add a cron job on the host to page you if the health check fails:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# /etc/cron.d/hermes-healthcheck
|
||||||
|
* * * * * root curl -sf http://127.0.0.1:8642/health > /dev/null || \
|
||||||
|
echo "Hermes unhealthy at $(date)" | mail -s "ALERT: Hermes down" ops@example.com
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 14. Security Checklist
|
||||||
|
|
||||||
|
- [ ] `.env` has permissions `600` and is **not** tracked by git (`git ls-files .env` returns nothing).
|
||||||
|
- [ ] `API_SERVER_KEY` is set if the API server is exposed beyond `127.0.0.1`.
|
||||||
|
- [ ] API server is bound to `127.0.0.1` (not `0.0.0.0`) unless behind a TLS-terminating reverse proxy.
|
||||||
|
- [ ] Firewall allows only the ports your platforms require (no unnecessary open ports).
|
||||||
|
- [ ] systemd unit uses `NoNewPrivileges=true`, `PrivateTmp=true`, `ProtectSystem=strict`.
|
||||||
|
- [ ] Docker container has resource limits set (`deploy.resources.limits`).
|
||||||
|
- [ ] Backups of `~/.hermes/` are stored outside the server (e.g. S3, remote NAS).
|
||||||
|
- [ ] `hermes doctor` returns no errors on the running instance.
|
||||||
|
- [ ] `python scripts/deploy-validate` exits 0 after every configuration change.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 15. Troubleshooting
|
||||||
|
|
||||||
|
### Gateway won't start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes gateway start --replace # clears stale PID files
|
||||||
|
|
||||||
|
# Check for port conflicts
|
||||||
|
ss -tlnp | grep 8642
|
||||||
|
|
||||||
|
# Verbose logs
|
||||||
|
HERMES_LOG_LEVEL=DEBUG hermes gateway start
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health check returns `gateway_state: "starting"` for more than 60 s
|
||||||
|
|
||||||
|
Platform adapters take time to authenticate (especially Telegram + Discord). Check logs for auth errors:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
journalctl -u hermes-gateway --since "2 minutes ago" | grep -i "error\|token\|auth"
|
||||||
|
```
|
||||||
|
|
||||||
|
### `/health` returns connection refused
|
||||||
|
|
||||||
|
The API server platform may not be enabled. Verify your gateway config (`~/.hermes/config.yaml`) includes:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
gateway:
|
||||||
|
platforms:
|
||||||
|
- api_server
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rollback needed after failed update
|
||||||
|
|
||||||
|
See [Section 9](#9-rollback-procedure). If you backed up before updating, rollback takes < 5 minutes.
|
||||||
|
|
||||||
|
### Sessions lost after restart
|
||||||
|
|
||||||
|
Sessions are file-based in `~/.hermes/sessions/`. They persist across restarts. If they are gone, check:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ls -la ~/.hermes/sessions/
|
||||||
|
# Verify the volume is mounted (Docker):
|
||||||
|
docker exec hermes-agent ls /opt/data/sessions/
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This runbook is owned by the Bezalel epic backlog. Update it whenever deployment procedures change.*
|
||||||
@@ -1,143 +0,0 @@
|
|||||||
"""
|
|
||||||
Stop Protocol — M1 of Epic #842.
|
|
||||||
|
|
||||||
Implements a hard pre-tool-check interrupt for explicit stop/halt commands.
|
|
||||||
Provides STOP_ACK logging, hands-off registry management, and compliance hooks.
|
|
||||||
|
|
||||||
@soul:service.sovereignty Every agent must respect the user's right to halt.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
# Matches explicit stop/halt commands at the start of a message or in SYSTEM tags.
|
|
||||||
STOP_PATTERN = re.compile(
|
|
||||||
r"^\s*(?:\[SYSTEM:\s*)?(?:stop|halt)(?:\s+means\s+(?:stop|halt))?[\.!\s]*"
|
|
||||||
r"|^\s*(?:stop|halt)\s+(?:all\s+work|everything|immediately|now)[\.!\s]*"
|
|
||||||
r"|^\s*(?:stop|halt)\s*$",
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
SYSTEM_STOP_PATTERN = re.compile(r"\[SYSTEM:\s*.*?\bstop\b.*?\]", re.IGNORECASE)
|
|
||||||
|
|
||||||
ALLEGRO_LOG_PATH = os.path.expanduser("~/.hermes/burn-logs/allegro.log")
|
|
||||||
CYCLE_STATE_PATH = os.path.expanduser("~/.hermes/allegro-cycle-state.json")
|
|
||||||
|
|
||||||
|
|
||||||
class StopProtocol:
|
|
||||||
"""Detects stop commands, logs STOP_ACK, and manages hands-off registry."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
cycle_state_path: str = CYCLE_STATE_PATH,
|
|
||||||
log_path: str = ALLEGRO_LOG_PATH,
|
|
||||||
):
|
|
||||||
self.cycle_state_path = cycle_state_path
|
|
||||||
self.log_path = log_path
|
|
||||||
|
|
||||||
def is_stop_command(self, text: str) -> bool:
|
|
||||||
"""Return True if *text* is an explicit stop/halt command."""
|
|
||||||
if not text or not isinstance(text, str):
|
|
||||||
return False
|
|
||||||
stripped = text.strip()
|
|
||||||
if SYSTEM_STOP_PATTERN.search(stripped):
|
|
||||||
return True
|
|
||||||
return bool(STOP_PATTERN.search(stripped))
|
|
||||||
|
|
||||||
def check_messages(self, messages: List[Dict[str, Any]]) -> bool:
|
|
||||||
"""Check the most recent user message for a stop command."""
|
|
||||||
if not messages:
|
|
||||||
return False
|
|
||||||
for msg in reversed(messages):
|
|
||||||
if isinstance(msg, dict) and msg.get("role") == "user":
|
|
||||||
return self.is_stop_command(msg.get("content", "") or "")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _load_state(self) -> Dict[str, Any]:
|
|
||||||
try:
|
|
||||||
with open(self.cycle_state_path, "r", encoding="utf-8") as f:
|
|
||||||
return json.load(f)
|
|
||||||
except (FileNotFoundError, json.JSONDecodeError):
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _save_state(self, state: Dict[str, Any]) -> None:
|
|
||||||
os.makedirs(os.path.dirname(self.cycle_state_path), exist_ok=True)
|
|
||||||
with open(self.cycle_state_path, "w", encoding="utf-8") as f:
|
|
||||||
json.dump(state, f, indent=2)
|
|
||||||
|
|
||||||
def is_hands_off(self, target: Optional[str] = None) -> bool:
|
|
||||||
"""Return True if *target* (or global) is currently under hands-off lock."""
|
|
||||||
state = self._load_state()
|
|
||||||
registry = state.get("hands_off_registry", {})
|
|
||||||
expiry_str = registry.get("global") or (
|
|
||||||
registry.get(target) if target else None
|
|
||||||
)
|
|
||||||
if not expiry_str:
|
|
||||||
return False
|
|
||||||
try:
|
|
||||||
expiry = datetime.fromisoformat(expiry_str)
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
if expiry.tzinfo is None:
|
|
||||||
expiry = expiry.replace(tzinfo=timezone.utc)
|
|
||||||
return now < expiry
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def add_hands_off(
|
|
||||||
self, target: Optional[str] = None, duration_hours: int = 24
|
|
||||||
) -> None:
|
|
||||||
"""Register a hands-off lock for *target* (or global) for *duration_hours*."""
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
expiry = now + timedelta(hours=duration_hours)
|
|
||||||
state = self._load_state()
|
|
||||||
if "hands_off_registry" not in state:
|
|
||||||
state["hands_off_registry"] = {}
|
|
||||||
key = target or "global"
|
|
||||||
state["hands_off_registry"][key] = expiry.isoformat()
|
|
||||||
self._save_state(state)
|
|
||||||
|
|
||||||
def log_stop_ack(self, context: str = "") -> None:
|
|
||||||
"""Append a STOP_ACK entry to the Allegro burn log."""
|
|
||||||
now = datetime.now(timezone.utc).isoformat()
|
|
||||||
entry = (
|
|
||||||
f"[{now}] STOP_ACK: Stop command detected and enforced. "
|
|
||||||
f"Context: {context}\n"
|
|
||||||
)
|
|
||||||
os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
|
|
||||||
with open(self.log_path, "a", encoding="utf-8") as f:
|
|
||||||
f.write(entry)
|
|
||||||
|
|
||||||
def enforce(self, messages: List[Dict[str, Any]]) -> bool:
|
|
||||||
"""
|
|
||||||
Detect stop in *messages*, log ACK, and set hands-off.
|
|
||||||
Returns True when stop is enforced (caller must abort tool execution).
|
|
||||||
"""
|
|
||||||
if not self.check_messages(messages):
|
|
||||||
return False
|
|
||||||
|
|
||||||
context = ""
|
|
||||||
for msg in reversed(messages):
|
|
||||||
if isinstance(msg, dict) and msg.get("role") == "user":
|
|
||||||
raw = (msg.get("content", "") or "").strip()
|
|
||||||
context = raw[:200].replace("\n", " ")
|
|
||||||
break
|
|
||||||
|
|
||||||
self.log_stop_ack(context)
|
|
||||||
self.add_hands_off(target=None, duration_hours=24)
|
|
||||||
return True
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def build_cancelled_result(function_name: str) -> str:
|
|
||||||
"""JSON result string for a tool cancelled by stop protocol."""
|
|
||||||
return json.dumps(
|
|
||||||
{
|
|
||||||
"success": False,
|
|
||||||
"error": (
|
|
||||||
"STOP_ACK: Stop command enforced. "
|
|
||||||
f"{function_name} was not executed."
|
|
||||||
),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
33
deploy/docker-compose.override.yml.example
Normal file
33
deploy/docker-compose.override.yml.example
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# docker-compose.override.yml.example
|
||||||
|
#
|
||||||
|
# Copy this file to docker-compose.override.yml and uncomment sections as needed.
|
||||||
|
# Override files are merged on top of docker-compose.yml automatically.
|
||||||
|
# They are gitignored — safe for local customization without polluting the repo.
|
||||||
|
|
||||||
|
services:
|
||||||
|
hermes:
|
||||||
|
# --- Local build (for development) ---
|
||||||
|
# build:
|
||||||
|
# context: ..
|
||||||
|
# dockerfile: ../Dockerfile
|
||||||
|
# target: development
|
||||||
|
|
||||||
|
# --- Expose gateway port externally (dev only — not for production) ---
|
||||||
|
# ports:
|
||||||
|
# - "8642:8642"
|
||||||
|
|
||||||
|
# --- Attach to a custom network shared with other local services ---
|
||||||
|
# networks:
|
||||||
|
# - myapp_network
|
||||||
|
|
||||||
|
# --- Override resource limits for a smaller VPS ---
|
||||||
|
# deploy:
|
||||||
|
# resources:
|
||||||
|
# limits:
|
||||||
|
# cpus: "0.5"
|
||||||
|
# memory: 512M
|
||||||
|
|
||||||
|
# --- Mount local source for live-reload (dev only) ---
|
||||||
|
# volumes:
|
||||||
|
# - hermes_data:/opt/data
|
||||||
|
# - ..:/opt/hermes:ro
|
||||||
85
deploy/docker-compose.yml
Normal file
85
deploy/docker-compose.yml
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# Hermes Agent — Docker Compose Stack
|
||||||
|
# Brings up the agent + messaging gateway as a single unit.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# docker compose up -d # start in background
|
||||||
|
# docker compose logs -f # follow logs
|
||||||
|
# docker compose down # stop and remove containers
|
||||||
|
# docker compose pull && docker compose up -d # rolling update
|
||||||
|
#
|
||||||
|
# Secrets:
|
||||||
|
# Never commit .env to version control. Copy .env.example → .env and fill it in.
|
||||||
|
# See DEPLOY.md for the full environment-variable reference.
|
||||||
|
|
||||||
|
services:
|
||||||
|
hermes:
|
||||||
|
image: ghcr.io/nousresearch/hermes-agent:latest
|
||||||
|
# To build locally instead:
|
||||||
|
# build:
|
||||||
|
# context: ..
|
||||||
|
# dockerfile: ../Dockerfile
|
||||||
|
container_name: hermes-agent
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# Bind-mount the data volume so state (sessions, logs, memories, cron)
|
||||||
|
# survives container replacement.
|
||||||
|
volumes:
|
||||||
|
- hermes_data:/opt/data
|
||||||
|
|
||||||
|
# Load secrets from the .env file next to docker-compose.yml.
|
||||||
|
# The file is bind-mounted at runtime; it is NOT baked into the image.
|
||||||
|
env_file:
|
||||||
|
- ../.env
|
||||||
|
|
||||||
|
environment:
|
||||||
|
# Override the data directory so it always points at the volume.
|
||||||
|
HERMES_HOME: /opt/data
|
||||||
|
|
||||||
|
# Expose the OpenAI-compatible API server (if api_server platform enabled).
|
||||||
|
# Comment out or remove if you are not using the API server.
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8642:8642"
|
||||||
|
|
||||||
|
healthcheck:
|
||||||
|
# Hits the API server's /health endpoint. The gateway writes its own
|
||||||
|
# health state to /opt/data/gateway_state.json — checked by the
|
||||||
|
# health-check script in scripts/deploy-validate.
|
||||||
|
test: ["CMD", "python3", "-c",
|
||||||
|
"import urllib.request; urllib.request.urlopen('http://localhost:8642/health', timeout=5)"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# The container does not need internet on a private network;
|
||||||
|
# restrict egress as needed via your host firewall.
|
||||||
|
networks:
|
||||||
|
- hermes_net
|
||||||
|
|
||||||
|
logging:
|
||||||
|
driver: "json-file"
|
||||||
|
options:
|
||||||
|
max-size: "50m"
|
||||||
|
max-file: "5"
|
||||||
|
|
||||||
|
# Resource limits: tune for your VPS size.
|
||||||
|
# 2 GB RAM and 1.5 CPUs work for most conversational workloads.
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: "1.5"
|
||||||
|
memory: 2G
|
||||||
|
reservations:
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
hermes_data:
|
||||||
|
# Named volume — Docker manages the lifecycle.
|
||||||
|
# To inspect: docker volume inspect hermes_data
|
||||||
|
# To back up:
|
||||||
|
# docker run --rm -v hermes_data:/data -v $(pwd):/backup \
|
||||||
|
# alpine tar czf /backup/hermes_data_$(date +%F).tar.gz /data
|
||||||
|
|
||||||
|
networks:
|
||||||
|
hermes_net:
|
||||||
|
driver: bridge
|
||||||
59
deploy/hermes-agent.service
Normal file
59
deploy/hermes-agent.service
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# systemd unit — Hermes Agent (interactive CLI / headless agent)
|
||||||
|
#
|
||||||
|
# Install:
|
||||||
|
# sudo cp hermes-agent.service /etc/systemd/system/
|
||||||
|
# sudo systemctl daemon-reload
|
||||||
|
# sudo systemctl enable --now hermes-agent
|
||||||
|
#
|
||||||
|
# This unit runs the Hermes CLI in headless / non-interactive mode, meaning the
|
||||||
|
# agent loop stays alive but does not present a TUI. It is appropriate for
|
||||||
|
# dedicated VPS deployments where you want the agent always running and
|
||||||
|
# accessible via the messaging gateway or API server.
|
||||||
|
#
|
||||||
|
# If you only want the messaging gateway, use hermes-gateway.service instead.
|
||||||
|
# Running both units simultaneously is safe — they share ~/.hermes by default.
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Hermes Agent
|
||||||
|
Documentation=https://hermes-agent.nousresearch.com/docs/
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=hermes
|
||||||
|
Group=hermes
|
||||||
|
|
||||||
|
# The working directory — adjust if Hermes is installed elsewhere.
|
||||||
|
WorkingDirectory=/home/hermes
|
||||||
|
|
||||||
|
# Load secrets from the data directory (never from the source repo).
|
||||||
|
EnvironmentFile=/home/hermes/.hermes/.env
|
||||||
|
|
||||||
|
# Run the gateway; add --replace if restarting over a stale PID file.
|
||||||
|
ExecStart=/home/hermes/.local/bin/hermes gateway start
|
||||||
|
|
||||||
|
# Graceful stop: send SIGTERM and wait up to 30 s before SIGKILL.
|
||||||
|
ExecStop=/bin/kill -TERM $MAINPID
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
# Restart automatically on failure; back off exponentially.
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=60s
|
||||||
|
|
||||||
|
# Security hardening — tighten as appropriate for your deployment.
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=read-only
|
||||||
|
ReadWritePaths=/home/hermes/.hermes /home/hermes/.local/share/hermes
|
||||||
|
|
||||||
|
# Logging — output goes to journald; read with: journalctl -u hermes-agent -f
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=hermes-agent
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
59
deploy/hermes-gateway.service
Normal file
59
deploy/hermes-gateway.service
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# systemd unit — Hermes Gateway (messaging platform adapter)
|
||||||
|
#
|
||||||
|
# Install:
|
||||||
|
# sudo cp hermes-gateway.service /etc/systemd/system/
|
||||||
|
# sudo systemctl daemon-reload
|
||||||
|
# sudo systemctl enable --now hermes-gateway
|
||||||
|
#
|
||||||
|
# The gateway connects Hermes to Telegram, Discord, Slack, WhatsApp, Signal,
|
||||||
|
# and other platforms. It is a long-running asyncio process that bridges
|
||||||
|
# inbound messages to the agent and routes responses back.
|
||||||
|
#
|
||||||
|
# See DEPLOY.md for environment variable configuration.
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Hermes Gateway (messaging platform bridge)
|
||||||
|
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/messaging
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=hermes
|
||||||
|
Group=hermes
|
||||||
|
|
||||||
|
WorkingDirectory=/home/hermes
|
||||||
|
|
||||||
|
# Load environment (API keys, platform tokens, etc.) from the data directory.
|
||||||
|
EnvironmentFile=/home/hermes/.hermes/.env
|
||||||
|
|
||||||
|
# --replace clears stale PID/lock files from an unclean previous shutdown.
|
||||||
|
ExecStart=/home/hermes/.local/bin/hermes gateway start --replace
|
||||||
|
|
||||||
|
# Pre-start hook: write a timestamped marker so rollback can diff against it.
|
||||||
|
ExecStartPre=/bin/sh -c 'echo "$(date -u +%%Y-%%m-%%dT%%H:%%M:%%SZ) gateway starting" >> /home/hermes/.hermes/logs/deploy.log'
|
||||||
|
|
||||||
|
# Post-stop hook: log shutdown time for audit trail.
|
||||||
|
ExecStopPost=/bin/sh -c 'echo "$(date -u +%%Y-%%m-%%dT%%H:%%M:%%SZ) gateway stopped" >> /home/hermes/.hermes/logs/deploy.log'
|
||||||
|
|
||||||
|
ExecStop=/bin/kill -TERM $MAINPID
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=60s
|
||||||
|
|
||||||
|
# Security hardening.
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=read-only
|
||||||
|
ReadWritePaths=/home/hermes/.hermes /home/hermes/.local/share/hermes
|
||||||
|
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=hermes-gateway
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
678
docs/jupyter-as-execution-layer-research.md
Normal file
678
docs/jupyter-as-execution-layer-research.md
Normal file
@@ -0,0 +1,678 @@
|
|||||||
|
# Jupyter Notebooks as Core LLM Execution Layer — Deep Research Report
|
||||||
|
|
||||||
|
**Issue:** #155
|
||||||
|
**Date:** 2026-04-06
|
||||||
|
**Status:** Research / Spike
|
||||||
|
**Prior Art:** Timmy's initial spike (llm_execution_spike.ipynb, hamelnb bridge, JupyterLab on forge VPS)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Executive Summary
|
||||||
|
|
||||||
|
This report deepens the research from issue #155 into three areas requested by Rockachopa:
|
||||||
|
1. The **full Jupyter product suite** — JupyterHub vs JupyterLab vs Notebook
|
||||||
|
2. **Papermill** — the production-grade notebook execution engine already used in real data pipelines
|
||||||
|
3. The **"PR model for notebooks"** — how agents can propose, diff, review, and merge changes to `.ipynb` files similarly to code PRs
|
||||||
|
|
||||||
|
The conclusion: an elegant, production-grade agent→notebook pipeline already exists as open-source tooling. We don't need to invent much — we need to compose what's there.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. The Jupyter Product Suite
|
||||||
|
|
||||||
|
The Jupyter ecosystem has three distinct layers that are often conflated. Understanding the distinction is critical for architectural decisions.
|
||||||
|
|
||||||
|
### 1.1 Jupyter Notebook (Classic)
|
||||||
|
|
||||||
|
The original single-user interface. One browser tab = one `.ipynb` file. Version 6 is in maintenance-only mode. Version 7 was rebuilt on JupyterLab components and is functionally equivalent. For headless agent use, the UI is irrelevant — what matters is the `.ipynb` file format and the kernel execution model underneath.
|
||||||
|
|
||||||
|
### 1.2 JupyterLab
|
||||||
|
|
||||||
|
The current canonical Jupyter interface for human users: full IDE, multi-pane, terminal, extension manager, built-in diff viewer, and `jupyterlab-git` for Git workflows from the UI. JupyterLab is the recommended target for agent-collaborative workflows because:
|
||||||
|
|
||||||
|
- It exposes the same REST API as classic Jupyter (kernel sessions, execute, contents)
|
||||||
|
- Extensions like `jupyterlab-git` let a human co-reviewer inspect changes alongside the agent
|
||||||
|
- The `hamelnb` bridge Timmy already validated works against a JupyterLab server
|
||||||
|
|
||||||
|
**For agents:** JupyterLab is the platform to run on. The agent doesn't interact with the UI — it uses the Jupyter REST API or Papermill on top of it.
|
||||||
|
|
||||||
|
### 1.3 JupyterHub — The Multi-User Orchestration Layer
|
||||||
|
|
||||||
|
JupyterHub is not a UI. It is a **multi-user server** that spawns, manages, and proxies individual single-user Jupyter servers. This is the production infrastructure layer.
|
||||||
|
|
||||||
|
```
|
||||||
|
[Agent / Browser / API Client]
|
||||||
|
|
|
||||||
|
[Proxy] (configurable-http-proxy)
|
||||||
|
/ \
|
||||||
|
[Hub] [Single-User Jupyter Server per user/agent]
|
||||||
|
(Auth, (standard JupyterLab/Notebook server)
|
||||||
|
Spawner,
|
||||||
|
REST API)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key components:**
|
||||||
|
- **Hub:** Manages auth, user database, spawner lifecycle, REST API
|
||||||
|
- **Proxy:** Routes `/hub/*` to Hub, `/user/<name>/*` to that user's server
|
||||||
|
- **Spawner:** How single-user servers are started. Default = local process. Production options include `KubeSpawner` (Kubernetes pod per user) and `DockerSpawner` (container per user)
|
||||||
|
- **Authenticator:** PAM, OAuth, DummyAuthenticator (for isolated agent environments)
|
||||||
|
|
||||||
|
**JupyterHub REST API** (relevant for agent orchestration):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Spawn a named server for an agent service account
|
||||||
|
POST /hub/api/users/<username>/servers/<name>
|
||||||
|
|
||||||
|
# Stop it when done
|
||||||
|
DELETE /hub/api/users/<username>/servers/<name>
|
||||||
|
|
||||||
|
# Create a scoped API token for the agent
|
||||||
|
POST /hub/api/users/<username>/tokens
|
||||||
|
|
||||||
|
# Check server status
|
||||||
|
GET /hub/api/users/<username>
|
||||||
|
```
|
||||||
|
|
||||||
|
**Why this matters for Hermes:** JupyterHub gives us isolated kernel environments per agent task, programmable lifecycle management, and a clean auth model. Instead of running one shared JupyterLab instance on the forge VPS, we could spawn ephemeral single-user servers per notebook execution run — each with its own kernel, clean state, and resource limits.
|
||||||
|
|
||||||
|
### 1.4 Jupyter Kernel Gateway — Minimal Headless Execution
|
||||||
|
|
||||||
|
If JupyterHub is too heavy, `jupyter-kernel-gateway` exposes just the kernel protocol over REST + WebSocket:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install jupyter-kernel-gateway
|
||||||
|
jupyter kernelgateway --KernelGatewayApp.api=kernel_gateway.jupyter_websocket
|
||||||
|
|
||||||
|
# Start kernel
|
||||||
|
POST /api/kernels
|
||||||
|
# Execute via WebSocket on Jupyter messaging protocol
|
||||||
|
WS /api/kernels/<kernel_id>/channels
|
||||||
|
# Stop kernel
|
||||||
|
DELETE /api/kernels/<kernel_id>
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the lowest-level option: no notebook management, just raw kernel access. Suitable if we want to build our own execution layer from scratch.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Papermill — Production Notebook Execution
|
||||||
|
|
||||||
|
Papermill is the missing link between "notebook as experiment" and "notebook as repeatable pipeline task." It is already used at scale in industry data pipelines (Netflix, Airbnb, etc.).
|
||||||
|
|
||||||
|
### 2.1 Core Concept: Parameterization
|
||||||
|
|
||||||
|
Papermill's key innovation is **parameter injection**. Tag a cell in the notebook with `"parameters"`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Cell tagged "parameters" (defaults — defined by notebook author)
|
||||||
|
alpha = 0.5
|
||||||
|
batch_size = 32
|
||||||
|
model_name = "baseline"
|
||||||
|
```
|
||||||
|
|
||||||
|
At runtime, Papermill inserts a new cell immediately after, tagged `"injected-parameters"`, that overrides the defaults:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Cell tagged "injected-parameters" (injected by Papermill at runtime)
|
||||||
|
alpha = 0.01
|
||||||
|
batch_size = 128
|
||||||
|
model_name = "experiment_007"
|
||||||
|
```
|
||||||
|
|
||||||
|
Because Python executes top-to-bottom, the injected cell shadows the defaults. The original notebook is never mutated — Papermill reads input, writes to a new output file.
|
||||||
|
|
||||||
|
### 2.2 Python API
|
||||||
|
|
||||||
|
```python
|
||||||
|
import papermill as pm
|
||||||
|
|
||||||
|
nb = pm.execute_notebook(
|
||||||
|
input_path="analysis.ipynb", # source (can be s3://, az://, gs://)
|
||||||
|
output_path="output/run_001.ipynb", # destination (persists outputs)
|
||||||
|
parameters={
|
||||||
|
"alpha": 0.01,
|
||||||
|
"n_samples": 1000,
|
||||||
|
"run_id": "fleet-check-2026-04-06",
|
||||||
|
},
|
||||||
|
kernel_name="python3",
|
||||||
|
execution_timeout=300, # per-cell timeout in seconds
|
||||||
|
log_output=True, # stream cell output to logger
|
||||||
|
cwd="/path/to/notebook/", # working directory
|
||||||
|
)
|
||||||
|
# Returns: NotebookNode (the fully executed notebook with all outputs)
|
||||||
|
```
|
||||||
|
|
||||||
|
On cell failure, Papermill raises `PapermillExecutionError` with:
|
||||||
|
- `cell_index` — which cell failed
|
||||||
|
- `source` — the failing cell's code
|
||||||
|
- `ename` / `evalue` — exception type and message
|
||||||
|
- `traceback` — full traceback
|
||||||
|
|
||||||
|
Even on failure, the output notebook is written with whatever cells completed — enabling partial-run inspection.
|
||||||
|
|
||||||
|
### 2.3 CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic execution
|
||||||
|
papermill analysis.ipynb output/run_001.ipynb \
|
||||||
|
-p alpha 0.01 \
|
||||||
|
-p n_samples 1000
|
||||||
|
|
||||||
|
# From YAML parameter file
|
||||||
|
papermill analysis.ipynb output/run_001.ipynb -f params.yaml
|
||||||
|
|
||||||
|
# CI-friendly: log outputs, no progress bar
|
||||||
|
papermill analysis.ipynb output/run_001.ipynb \
|
||||||
|
--log-output \
|
||||||
|
--no-progress-bar \
|
||||||
|
--execution-timeout 300 \
|
||||||
|
-p run_id "fleet-check-2026-04-06"
|
||||||
|
|
||||||
|
# Prepare only (inject params, skip execution — for preview/inspection)
|
||||||
|
papermill analysis.ipynb preview.ipynb --prepare-only -p alpha 0.01
|
||||||
|
|
||||||
|
# Inspect parameter schema
|
||||||
|
papermill --help-notebook analysis.ipynb
|
||||||
|
```
|
||||||
|
|
||||||
|
**Remote storage** is built in — `pip install papermill[s3]` enables `s3://` paths for both input and output. Azure and GCS are also supported. For Hermes, this means notebook runs can be stored in object storage and retrieved later for audit.
|
||||||
|
|
||||||
|
### 2.4 Scrapbook — Structured Output Collection
|
||||||
|
|
||||||
|
`scrapbook` is Papermill's companion for extracting structured data from executed notebooks. Inside a notebook cell:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import scrapbook as sb
|
||||||
|
|
||||||
|
# Write typed outputs (stored as special display_data in cell outputs)
|
||||||
|
sb.glue("accuracy", 0.9342)
|
||||||
|
sb.glue("metrics", {"precision": 0.91, "recall": 0.93, "f1": 0.92})
|
||||||
|
sb.glue("results_df", df, "pandas") # DataFrames too
|
||||||
|
```
|
||||||
|
|
||||||
|
After execution, from the agent:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import scrapbook as sb
|
||||||
|
|
||||||
|
nb = sb.read_notebook("output/fleet-check-2026-04-06.ipynb")
|
||||||
|
metrics = nb.scraps["metrics"].data # -> {"precision": 0.91, ...}
|
||||||
|
accuracy = nb.scraps["accuracy"].data # -> 0.9342
|
||||||
|
|
||||||
|
# Or aggregate across many runs
|
||||||
|
book = sb.read_notebooks("output/")
|
||||||
|
book.scrap_dataframe # -> pd.DataFrame with all scraps + filenames
|
||||||
|
```
|
||||||
|
|
||||||
|
This is the clean interface between notebook execution and agent decision-making: the notebook outputs its findings as named, typed scraps; the agent reads them programmatically and acts.
|
||||||
|
|
||||||
|
### 2.5 How Papermill Compares to hamelnb
|
||||||
|
|
||||||
|
| Capability | hamelnb | Papermill |
|
||||||
|
|---|---|---|
|
||||||
|
| Stateful kernel session | Yes | No (fresh kernel per run) |
|
||||||
|
| Parameter injection | No | Yes |
|
||||||
|
| Persistent output notebook | No | Yes |
|
||||||
|
| Remote storage (S3/Azure) | No | Yes |
|
||||||
|
| Per-cell timing/metadata | No | Yes (in output nb metadata) |
|
||||||
|
| Error isolation (partial runs) | No | Yes |
|
||||||
|
| Production pipeline use | Experimental | Industry-standard |
|
||||||
|
| Structured output collection | No | Yes (via scrapbook) |
|
||||||
|
|
||||||
|
**Verdict:** `hamelnb` is great for interactive REPL-style exploration (where state accumulates). Papermill is better for task execution (where we want reproducible, parameterized, auditable runs). They serve different use cases. Hermes needs both.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. The `.ipynb` File Format — What the Agent Is Actually Working With
|
||||||
|
|
||||||
|
Understanding the format is essential for the "PR model." A `.ipynb` file is JSON with this structure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5,
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
|
||||||
|
"language_info": {"name": "python", "version": "3.10.0"}
|
||||||
|
},
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"id": "a1b2c3d4",
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": "# Fleet Health Check\n\nThis notebook checks system health.",
|
||||||
|
"metadata": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "e5f6g7h8",
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "alpha = 0.5\nthreshold = 0.95",
|
||||||
|
"metadata": {"tags": ["parameters"]},
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "i9j0k1l2",
|
||||||
|
"cell_type": "code",
|
||||||
|
"source": "import sys\nprint(sys.version)",
|
||||||
|
"metadata": {},
|
||||||
|
"execution_count": 1,
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"output_type": "stream",
|
||||||
|
"name": "stdout",
|
||||||
|
"text": "3.10.0 (default, ...)\n"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `nbformat` Python library provides a clean API for working with this:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import nbformat
|
||||||
|
|
||||||
|
# Read
|
||||||
|
with open("notebook.ipynb") as f:
|
||||||
|
nb = nbformat.read(f, as_version=4)
|
||||||
|
|
||||||
|
# Navigate
|
||||||
|
for cell in nb.cells:
|
||||||
|
if cell.cell_type == "code":
|
||||||
|
print(cell.source)
|
||||||
|
|
||||||
|
# Modify
|
||||||
|
nb.cells[2].source = "import sys\nprint('updated')"
|
||||||
|
|
||||||
|
# Add cells
|
||||||
|
new_md = nbformat.v4.new_markdown_cell("## Agent Analysis\nInserted by Hermes.")
|
||||||
|
nb.cells.insert(3, new_md)
|
||||||
|
|
||||||
|
# Write
|
||||||
|
with open("modified.ipynb", "w") as f:
|
||||||
|
nbformat.write(nb, f)
|
||||||
|
|
||||||
|
# Validate
|
||||||
|
nbformat.validate(nb) # raises nbformat.ValidationError on invalid format
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. The PR Model for Notebooks
|
||||||
|
|
||||||
|
This is the elegant architecture Rockachopa described: agents making PRs to notebooks the same way they make PRs to code. Here's how the full stack enables it.
|
||||||
|
|
||||||
|
### 4.1 The Problem: Raw `.ipynb` Diffs Are Unusable
|
||||||
|
|
||||||
|
Without tooling, a `git diff` on a notebook that was merely re-run (no source changes) produces thousands of lines of JSON changes — execution counts, timestamps, base64-encoded plot images. Code review on raw `.ipynb` diffs is impractical.
|
||||||
|
|
||||||
|
### 4.2 nbstripout — Clean Git History
|
||||||
|
|
||||||
|
`nbstripout` installs a git **clean filter** that strips outputs before files enter the git index. The working copy is untouched; only what gets committed is clean.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install nbstripout
|
||||||
|
nbstripout --install # per-repo
|
||||||
|
# or
|
||||||
|
nbstripout --install --global # all repos
|
||||||
|
```
|
||||||
|
|
||||||
|
This writes to `.git/config`:
|
||||||
|
```ini
|
||||||
|
[filter "nbstripout"]
|
||||||
|
clean = nbstripout
|
||||||
|
smudge = cat
|
||||||
|
required = true
|
||||||
|
|
||||||
|
[diff "ipynb"]
|
||||||
|
textconv = nbstripout -t
|
||||||
|
```
|
||||||
|
|
||||||
|
And to `.gitattributes`:
|
||||||
|
```
|
||||||
|
*.ipynb filter=nbstripout
|
||||||
|
*.ipynb diff=ipynb
|
||||||
|
```
|
||||||
|
|
||||||
|
Now `git diff` shows only source changes — same as reviewing a `.py` file.
|
||||||
|
|
||||||
|
**For executed-output notebooks** (where we want to keep outputs for audit): use a separate path like `runs/` or `outputs/` excluded from the filter via `.gitattributes`:
|
||||||
|
```
|
||||||
|
*.ipynb filter=nbstripout
|
||||||
|
runs/*.ipynb !filter
|
||||||
|
runs/*.ipynb !diff
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 nbdime — Semantic Diff and Merge
|
||||||
|
|
||||||
|
nbdime understands notebook structure. Instead of diffing raw JSON, it diffs at the level of cells — knowing that `cells` is a list, `source` is a string, and outputs should often be ignored.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install nbdime
|
||||||
|
|
||||||
|
# Enable semantic git diff/merge for all .ipynb files
|
||||||
|
nbdime config-git --enable
|
||||||
|
|
||||||
|
# Now standard git commands are notebook-aware:
|
||||||
|
git diff HEAD notebook.ipynb # semantic cell-level diff
|
||||||
|
git merge feature-branch # uses nbdime for .ipynb conflict resolution
|
||||||
|
git log -p notebook.ipynb # readable patch per commit
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python API for agent reasoning:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import nbdime
|
||||||
|
import nbformat
|
||||||
|
|
||||||
|
nb_base = nbformat.read(open("original.ipynb"), as_version=4)
|
||||||
|
nb_pr = nbformat.read(open("proposed.ipynb"), as_version=4)
|
||||||
|
|
||||||
|
diff = nbdime.diff_notebooks(nb_base, nb_pr)
|
||||||
|
|
||||||
|
# diff is a list of structured ops the agent can reason about:
|
||||||
|
# [{"op": "patch", "key": "cells", "diff": [
|
||||||
|
# {"op": "patch", "key": 3, "diff": [
|
||||||
|
# {"op": "patch", "key": "source", "diff": [...string ops...]}
|
||||||
|
# ]}
|
||||||
|
# ]}]
|
||||||
|
|
||||||
|
# Apply a diff (patch)
|
||||||
|
from nbdime.patching import patch
|
||||||
|
nb_result = patch(nb_base, diff)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.4 The Full Agent PR Workflow
|
||||||
|
|
||||||
|
Here is the complete workflow — analogous to how Hermes makes PRs to code repos via Gitea:
|
||||||
|
|
||||||
|
**1. Agent reads the task notebook**
|
||||||
|
```python
|
||||||
|
nb = nbformat.read(open("fleet_health_check.ipynb"), as_version=4)
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Agent locates and modifies relevant cells**
|
||||||
|
```python
|
||||||
|
# Find parameter cell
|
||||||
|
params_cell = next(
|
||||||
|
c for c in nb.cells
|
||||||
|
if "parameters" in c.get("metadata", {}).get("tags", [])
|
||||||
|
)
|
||||||
|
# Update threshold
|
||||||
|
params_cell.source = params_cell.source.replace("threshold = 0.95", "threshold = 0.90")
|
||||||
|
|
||||||
|
# Add explanatory markdown
|
||||||
|
nb.cells.insert(
|
||||||
|
nb.cells.index(params_cell) + 1,
|
||||||
|
nbformat.v4.new_markdown_cell(
|
||||||
|
"**Note (Hermes 2026-04-06):** Threshold lowered from 0.95 to 0.90 "
|
||||||
|
"based on false-positive analysis from last 7 days of runs."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Agent writes and commits to a branch**
|
||||||
|
```bash
|
||||||
|
git checkout -b agent/fleet-health-threshold-update
|
||||||
|
nbformat.write(nb, open("fleet_health_check.ipynb", "w"))
|
||||||
|
git add fleet_health_check.ipynb
|
||||||
|
git commit -m "feat(notebooks): lower fleet health threshold to 0.90 (#155)"
|
||||||
|
```
|
||||||
|
|
||||||
|
**4. Agent executes the proposed notebook to validate**
|
||||||
|
```python
|
||||||
|
import papermill as pm
|
||||||
|
|
||||||
|
pm.execute_notebook(
|
||||||
|
"fleet_health_check.ipynb",
|
||||||
|
"output/validation_run.ipynb",
|
||||||
|
parameters={"run_id": "agent-validation-2026-04-06"},
|
||||||
|
log_output=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**5. Agent collects results and compares**
|
||||||
|
```python
|
||||||
|
import scrapbook as sb
|
||||||
|
|
||||||
|
result = sb.read_notebook("output/validation_run.ipynb")
|
||||||
|
health_score = result.scraps["health_score"].data
|
||||||
|
alert_count = result.scraps["alert_count"].data
|
||||||
|
```
|
||||||
|
|
||||||
|
**6. Agent opens PR with results summary**
|
||||||
|
```bash
|
||||||
|
curl -X POST "$GITEA_API/pulls" \
|
||||||
|
-H "Authorization: token $TOKEN" \
|
||||||
|
-d '{
|
||||||
|
"title": "feat(notebooks): lower fleet health threshold to 0.90",
|
||||||
|
"body": "## Agent Analysis\n\n- Health score: 0.94 (was 0.89 with old threshold)\n- Alert count: 12 (was 47 false positives)\n- Validation run: output/validation_run.ipynb\n\nRefs #155",
|
||||||
|
"head": "agent/fleet-health-threshold-update",
|
||||||
|
"base": "main"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**7. Human reviews the PR using nbdime diff**
|
||||||
|
|
||||||
|
The PR diff in Gitea shows the clean cell-level source changes (thanks to nbstripout). The human can also run `nbdiff-web original.ipynb proposed.ipynb` locally for rich rendered diff with output comparison.
|
||||||
|
|
||||||
|
### 4.5 nbval — Regression Testing Notebooks
|
||||||
|
|
||||||
|
`nbval` treats each notebook cell as a pytest test case, re-executing and comparing outputs to stored values:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install nbval
|
||||||
|
|
||||||
|
# Strict: every cell output must match stored outputs
|
||||||
|
pytest --nbval fleet_health_check.ipynb
|
||||||
|
|
||||||
|
# Lax: only check cells marked with # NBVAL_CHECK_OUTPUT
|
||||||
|
pytest --nbval-lax fleet_health_check.ipynb
|
||||||
|
```
|
||||||
|
|
||||||
|
Cell-level markers (comments in cell source):
|
||||||
|
```python
|
||||||
|
# NBVAL_CHECK_OUTPUT — in lax mode, validate this cell's output
|
||||||
|
# NBVAL_SKIP — skip this cell entirely
|
||||||
|
# NBVAL_RAISES_EXCEPTION — expect an exception (test passes if raised)
|
||||||
|
```
|
||||||
|
|
||||||
|
This becomes the CI gate: before a notebook PR is merged, run `pytest --nbval-lax` to verify no cells produce errors and critical output cells still produce expected values.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Gaps and Recommendations
|
||||||
|
|
||||||
|
### 5.1 Gap Assessment (Refining Timmy's Original Findings)
|
||||||
|
|
||||||
|
| Gap | Severity | Solution |
|
||||||
|
|---|---|---|
|
||||||
|
| No Hermes tool access in kernel | High | Inject `hermes_runtime` module (see §5.2) |
|
||||||
|
| No structured output protocol | High | Use scrapbook `sb.glue()` pattern |
|
||||||
|
| No parameterization | Medium | Add Papermill `"parameters"` cell to notebooks |
|
||||||
|
| XSRF/auth friction | Medium | Disable for local; use JupyterHub token scopes for multi-user |
|
||||||
|
| No notebook CI/testing | Medium | Add nbval to test suite |
|
||||||
|
| Raw `.ipynb` diffs in PRs | Medium | Install nbstripout + nbdime |
|
||||||
|
| No scheduling | Low | Papermill + existing Hermes cron layer |
|
||||||
|
|
||||||
|
### 5.2 Short-Term Recommendations (This Month)
|
||||||
|
|
||||||
|
**1. `NotebookExecutor` tool**
|
||||||
|
|
||||||
|
A thin Hermes tool wrapping the ecosystem:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class NotebookExecutor:
|
||||||
|
def execute(self, input_path, output_path, parameters, timeout=300):
|
||||||
|
"""Wraps pm.execute_notebook(). Returns structured result dict."""
|
||||||
|
|
||||||
|
def collect_outputs(self, notebook_path):
|
||||||
|
"""Wraps sb.read_notebook(). Returns dict of named scraps."""
|
||||||
|
|
||||||
|
def inspect_parameters(self, notebook_path):
|
||||||
|
"""Wraps pm.inspect_notebook(). Returns parameter schema."""
|
||||||
|
|
||||||
|
def read_notebook(self, path):
|
||||||
|
"""Returns nbformat NotebookNode for cell inspection/modification."""
|
||||||
|
|
||||||
|
def write_notebook(self, nb, path):
|
||||||
|
"""Writes modified NotebookNode back to disk."""
|
||||||
|
|
||||||
|
def diff_notebooks(self, path_a, path_b):
|
||||||
|
"""Returns structured nbdime diff for agent reasoning."""
|
||||||
|
|
||||||
|
def validate(self, notebook_path):
|
||||||
|
"""Runs nbformat.validate() + optional pytest --nbval-lax."""
|
||||||
|
```
|
||||||
|
|
||||||
|
Execution result structure for the agent:
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"status": "success" | "error",
|
||||||
|
"duration_seconds": 12.34,
|
||||||
|
"cells_executed": 15,
|
||||||
|
"failed_cell": { # None on success
|
||||||
|
"index": 7,
|
||||||
|
"source": "model.fit(X, y)",
|
||||||
|
"ename": "ValueError",
|
||||||
|
"evalue": "Input contains NaN",
|
||||||
|
},
|
||||||
|
"scraps": { # from scrapbook
|
||||||
|
"health_score": 0.94,
|
||||||
|
"alert_count": 12,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Fleet Health Check as a Notebook**
|
||||||
|
|
||||||
|
Convert the fleet health check epic into a parameterized notebook with:
|
||||||
|
- `"parameters"` cell for run configuration (date range, thresholds, agent ID)
|
||||||
|
- Markdown cells narrating each step
|
||||||
|
- `sb.glue()` calls for structured outputs
|
||||||
|
- `# NBVAL_CHECK_OUTPUT` markers on critical cells
|
||||||
|
|
||||||
|
**3. Git hygiene for notebooks**
|
||||||
|
|
||||||
|
Install nbstripout + nbdime in the hermes-agent repo:
|
||||||
|
```bash
|
||||||
|
pip install nbstripout nbdime
|
||||||
|
nbstripout --install
|
||||||
|
nbdime config-git --enable
|
||||||
|
```
|
||||||
|
|
||||||
|
Add to `.gitattributes`:
|
||||||
|
```
|
||||||
|
*.ipynb filter=nbstripout
|
||||||
|
*.ipynb diff=ipynb
|
||||||
|
runs/*.ipynb !filter
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 Medium-Term Recommendations (Next Quarter)
|
||||||
|
|
||||||
|
**4. `hermes_runtime` Python module**
|
||||||
|
|
||||||
|
Inject Hermes tool access into the kernel via a module that notebooks import:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# In kernel cell: from hermes_runtime import terminal, read_file, web_search
|
||||||
|
import hermes_runtime as hermes
|
||||||
|
|
||||||
|
results = hermes.web_search("fleet health metrics best practices")
|
||||||
|
hermes.terminal("systemctl status agent-fleet")
|
||||||
|
content = hermes.read_file("/var/log/hermes/agent.log")
|
||||||
|
```
|
||||||
|
|
||||||
|
This closes the most significant gap: notebooks gain the same tool access as skills, while retaining state persistence and narrative structure.
|
||||||
|
|
||||||
|
**5. Notebook-triggered cron**
|
||||||
|
|
||||||
|
Extend the Hermes cron layer to accept `.ipynb` paths as targets:
|
||||||
|
```yaml
|
||||||
|
# cron entry
|
||||||
|
schedule: "0 6 * * *"
|
||||||
|
type: notebook
|
||||||
|
path: notebooks/fleet_health_check.ipynb
|
||||||
|
parameters:
|
||||||
|
run_id: "{{date}}"
|
||||||
|
alert_threshold: 0.90
|
||||||
|
output_path: runs/fleet_health_{{date}}.ipynb
|
||||||
|
```
|
||||||
|
|
||||||
|
The cron runner calls `pm.execute_notebook()` and commits the output to the repo.
|
||||||
|
|
||||||
|
**6. JupyterHub for multi-agent isolation**
|
||||||
|
|
||||||
|
If multiple agents need concurrent notebook execution, deploy JupyterHub with `DockerSpawner` or `KubeSpawner`. Each agent job gets an isolated container with its own kernel, no state bleed between runs.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Architecture Vision
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────┐
|
||||||
|
│ Hermes Agent │
|
||||||
|
│ │
|
||||||
|
│ Skills (one-shot) Notebooks (multi-step) │
|
||||||
|
│ ┌─────────────────┐ ┌─────────────────────────────────┐ │
|
||||||
|
│ │ terminal() │ │ .ipynb file │ │
|
||||||
|
│ │ web_search() │ │ ├── Markdown (narrative) │ │
|
||||||
|
│ │ read_file() │ │ ├── Code cells (logic) │ │
|
||||||
|
│ └─────────────────┘ │ ├── "parameters" cell │ │
|
||||||
|
│ │ └── sb.glue() outputs │ │
|
||||||
|
│ └──────────────┬────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ ┌──────────────▼────────────────┐ │
|
||||||
|
│ │ NotebookExecutor tool │ │
|
||||||
|
│ │ (papermill + scrapbook + │ │
|
||||||
|
│ │ nbformat + nbdime + nbval) │ │
|
||||||
|
│ └──────────────┬────────────────┘ │
|
||||||
|
│ │ │
|
||||||
|
└────────────────────────────────────────────┼────────────────────┘
|
||||||
|
│
|
||||||
|
┌───────────────────▼──────────────────┐
|
||||||
|
│ JupyterLab / Hub │
|
||||||
|
│ (kernel execution environment) │
|
||||||
|
└───────────────────┬──────────────────┘
|
||||||
|
│
|
||||||
|
┌───────────────────▼──────────────────┐
|
||||||
|
│ Git + Gitea │
|
||||||
|
│ (nbstripout clean diffs, │
|
||||||
|
│ nbdime semantic review, │
|
||||||
|
│ PR workflow for notebook changes) │
|
||||||
|
└──────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Notebooks become the primary artifact of complex tasks:** the agent generates or edits cells, Papermill executes them reproducibly, scrapbook extracts structured outputs for agent decision-making, and the resulting `.ipynb` is both proof-of-work and human-readable report. Skills remain for one-shot actions. Notebooks own multi-step workflows.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Package Summary
|
||||||
|
|
||||||
|
| Package | Purpose | Install |
|
||||||
|
|---|---|---|
|
||||||
|
| `nbformat` | Read/write/validate `.ipynb` files | `pip install nbformat` |
|
||||||
|
| `nbconvert` | Execute and export notebooks | `pip install nbconvert` |
|
||||||
|
| `papermill` | Parameterize + execute in pipelines | `pip install papermill` |
|
||||||
|
| `scrapbook` | Structured output collection | `pip install scrapbook` |
|
||||||
|
| `nbdime` | Semantic diff/merge for git | `pip install nbdime` |
|
||||||
|
| `nbstripout` | Git filter for clean diffs | `pip install nbstripout` |
|
||||||
|
| `nbval` | pytest-based output regression | `pip install nbval` |
|
||||||
|
| `jupyter-kernel-gateway` | Headless REST kernel access | `pip install jupyter-kernel-gateway` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. References
|
||||||
|
|
||||||
|
- [Papermill GitHub (nteract/papermill)](https://github.com/nteract/papermill)
|
||||||
|
- [Scrapbook GitHub (nteract/scrapbook)](https://github.com/nteract/scrapbook)
|
||||||
|
- [nbformat format specification](https://nbformat.readthedocs.io/en/latest/format_description.html)
|
||||||
|
- [nbdime documentation](https://nbdime.readthedocs.io/)
|
||||||
|
- [nbdime diff format spec (JEP #8)](https://github.com/jupyter/enhancement-proposals/blob/master/08-notebook-diff/notebook-diff.md)
|
||||||
|
- [nbconvert execute API](https://nbconvert.readthedocs.io/en/latest/execute_api.html)
|
||||||
|
- [nbstripout README](https://github.com/kynan/nbstripout)
|
||||||
|
- [nbval GitHub (computationalmodelling/nbval)](https://github.com/computationalmodelling/nbval)
|
||||||
|
- [JupyterHub REST API](https://jupyterhub.readthedocs.io/en/stable/howto/rest.html)
|
||||||
|
- [JupyterHub Technical Overview](https://jupyterhub.readthedocs.io/en/latest/reference/technical-overview.html)
|
||||||
|
- [Jupyter Kernel Gateway](https://github.com/jupyter-server/kernel_gateway)
|
||||||
@@ -443,6 +443,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||||||
self._runner: Optional["web.AppRunner"] = None
|
self._runner: Optional["web.AppRunner"] = None
|
||||||
self._site: Optional["web.TCPSite"] = None
|
self._site: Optional["web.TCPSite"] = None
|
||||||
self._response_store = ResponseStore()
|
self._response_store = ResponseStore()
|
||||||
|
self._start_time: float = time.time()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_cors_origins(value: Any) -> tuple[str, ...]:
|
def _parse_cors_origins(value: Any) -> tuple[str, ...]:
|
||||||
@@ -582,8 +583,53 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
async def _handle_health(self, request: "web.Request") -> "web.Response":
|
async def _handle_health(self, request: "web.Request") -> "web.Response":
|
||||||
"""GET /health — simple health check."""
|
"""GET /health — liveness probe with gateway runtime state.
|
||||||
return web.json_response({"status": "ok", "platform": "hermes-agent"})
|
|
||||||
|
Returns HTTP 200 with a JSON body while the API server process is alive.
|
||||||
|
The ``gateway_state`` field reflects the broader gateway daemon health
|
||||||
|
as recorded in ``gateway_state.json`` (written by gateway/status.py).
|
||||||
|
Consumers should treat any non-200 response as a failure.
|
||||||
|
|
||||||
|
Response fields:
|
||||||
|
status — always "ok" when the HTTP server is reachable.
|
||||||
|
platform — service name.
|
||||||
|
version — package version (if available).
|
||||||
|
uptime_seconds — seconds since this process started.
|
||||||
|
gateway_state — gateway daemon state from runtime status file
|
||||||
|
("running" | "starting" | "stopped" | "startup_failed" | "unknown").
|
||||||
|
platforms — per-platform adapter states (from runtime status).
|
||||||
|
"""
|
||||||
|
payload: dict = {
|
||||||
|
"status": "ok",
|
||||||
|
"platform": "hermes-agent",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Package version.
|
||||||
|
try:
|
||||||
|
from importlib.metadata import version as pkg_version
|
||||||
|
payload["version"] = pkg_version("hermes-agent")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Process uptime.
|
||||||
|
try:
|
||||||
|
payload["uptime_seconds"] = round(time.time() - self._start_time)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Gateway runtime state from the status file.
|
||||||
|
try:
|
||||||
|
from gateway.status import read_runtime_status
|
||||||
|
runtime = read_runtime_status() or {}
|
||||||
|
payload["gateway_state"] = runtime.get("gateway_state", "unknown")
|
||||||
|
payload["platforms"] = {
|
||||||
|
name: {"state": pdata.get("state", "unknown")}
|
||||||
|
for name, pdata in runtime.get("platforms", {}).items()
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
payload["gateway_state"] = "unknown"
|
||||||
|
|
||||||
|
return web.json_response(payload)
|
||||||
|
|
||||||
async def _handle_models(self, request: "web.Request") -> "web.Response":
|
async def _handle_models(self, request: "web.Request") -> "web.Response":
|
||||||
"""GET /v1/models — return hermes-agent as an available model."""
|
"""GET /v1/models — return hermes-agent as an available model."""
|
||||||
|
|||||||
955
observatory.py
Normal file
955
observatory.py
Normal file
@@ -0,0 +1,955 @@
|
|||||||
|
"""
|
||||||
|
Observatory — Testbed Health Monitoring & Alerting for Hermes Agent
|
||||||
|
|
||||||
|
Checks running services, system resources, and connectivity.
|
||||||
|
Fires Telegram alerts when thresholds are breached.
|
||||||
|
Posts daily digest reports.
|
||||||
|
Stores 30 days of historical health data in SQLite.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python observatory.py --check # one-shot health check (stdout)
|
||||||
|
python observatory.py --daemon # continuous monitor (60s poll)
|
||||||
|
python observatory.py --digest # print / send daily digest
|
||||||
|
python observatory.py --history N # show last N health records
|
||||||
|
python observatory.py --slo # print SLO report
|
||||||
|
|
||||||
|
Configuration (env vars, falls back to ~/.hermes/.env):
|
||||||
|
OBSERVATORY_ALERT_CHAT_ID Telegram chat ID for alerts
|
||||||
|
OBSERVATORY_DIGEST_CHAT_ID Telegram chat ID for daily digest (default: alert chat)
|
||||||
|
OBSERVATORY_POLL_INTERVAL Seconds between health polls (default: 60)
|
||||||
|
OBSERVATORY_DB_PATH SQLite path (default: ~/.hermes/observatory.db)
|
||||||
|
TELEGRAM_BOT_TOKEN Bot token used to send alerts
|
||||||
|
|
||||||
|
# Threshold overrides (all optional):
|
||||||
|
OBSERVATORY_DISK_WARN_PCT Disk usage warn threshold (default: 80)
|
||||||
|
OBSERVATORY_DISK_CRIT_PCT Disk usage critical threshold (default: 90)
|
||||||
|
OBSERVATORY_MEM_WARN_PCT Memory usage warn threshold (default: 80)
|
||||||
|
OBSERVATORY_MEM_CRIT_PCT Memory usage critical threshold (default: 90)
|
||||||
|
OBSERVATORY_CPU_WARN_PCT CPU usage warn threshold (default: 80)
|
||||||
|
OBSERVATORY_CPU_CRIT_PCT CPU usage critical threshold (default: 95)
|
||||||
|
OBSERVATORY_WEBHOOK_URL Webhook endpoint to probe (default: http://127.0.0.1:8080/health)
|
||||||
|
OBSERVATORY_API_URL API server health URL (default: http://127.0.0.1:8642/health)
|
||||||
|
OBSERVATORY_WEBHOOK_LATENCY_SLO_MS Webhook latency SLO ms (default: 2000)
|
||||||
|
OBSERVATORY_GATEWAY_UPTIME_SLO_PCT Gateway uptime SLO % (default: 99.5)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Optional imports
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
try:
|
||||||
|
import psutil
|
||||||
|
_PSUTIL = True
|
||||||
|
except ImportError:
|
||||||
|
_PSUTIL = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv as _load_dotenv
|
||||||
|
_DOTENV = True
|
||||||
|
except ImportError:
|
||||||
|
_DOTENV = False
|
||||||
|
|
||||||
|
logger = logging.getLogger("observatory")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants & SLO definitions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
RETENTION_DAYS = 30
|
||||||
|
|
||||||
|
SLO_DEFINITIONS = {
|
||||||
|
"gateway_uptime_pct": {
|
||||||
|
"description": "Gateway process uptime over the last 24 hours",
|
||||||
|
"target": 99.5,
|
||||||
|
"unit": "%",
|
||||||
|
},
|
||||||
|
"webhook_latency_ms": {
|
||||||
|
"description": "Webhook endpoint p95 response latency",
|
||||||
|
"target": 2000,
|
||||||
|
"unit": "ms",
|
||||||
|
"direction": "lower_is_better",
|
||||||
|
},
|
||||||
|
"api_server_latency_ms": {
|
||||||
|
"description": "API server /health p95 response latency",
|
||||||
|
"target": 2000,
|
||||||
|
"unit": "ms",
|
||||||
|
"direction": "lower_is_better",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Configuration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_env() -> None:
|
||||||
|
"""Load .env from HERMES_HOME if dotenv is available."""
|
||||||
|
if not _DOTENV:
|
||||||
|
return
|
||||||
|
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
env_path = hermes_home / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
_load_dotenv(env_path, override=False)
|
||||||
|
# Project-level .env as dev fallback
|
||||||
|
project_env = Path(__file__).parent / ".env"
|
||||||
|
if project_env.exists():
|
||||||
|
_load_dotenv(project_env, override=False)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ObservatoryConfig:
|
||||||
|
alert_chat_id: Optional[str] = None
|
||||||
|
digest_chat_id: Optional[str] = None
|
||||||
|
telegram_token: Optional[str] = None
|
||||||
|
poll_interval: int = 60
|
||||||
|
db_path: Path = field(default_factory=lambda: Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "observatory.db")
|
||||||
|
disk_warn_pct: float = 80.0
|
||||||
|
disk_crit_pct: float = 90.0
|
||||||
|
mem_warn_pct: float = 80.0
|
||||||
|
mem_crit_pct: float = 90.0
|
||||||
|
cpu_warn_pct: float = 80.0
|
||||||
|
cpu_crit_pct: float = 95.0
|
||||||
|
webhook_url: str = "http://127.0.0.1:8080/health"
|
||||||
|
api_url: str = "http://127.0.0.1:8642/health"
|
||||||
|
webhook_latency_slo_ms: float = 2000.0
|
||||||
|
gateway_uptime_slo_pct: float = 99.5
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_env(cls) -> "ObservatoryConfig":
|
||||||
|
_load_env()
|
||||||
|
cfg = cls()
|
||||||
|
cfg.telegram_token = os.getenv("TELEGRAM_BOT_TOKEN")
|
||||||
|
cfg.alert_chat_id = os.getenv("OBSERVATORY_ALERT_CHAT_ID")
|
||||||
|
cfg.digest_chat_id = os.getenv("OBSERVATORY_DIGEST_CHAT_ID") or cfg.alert_chat_id
|
||||||
|
cfg.poll_interval = int(os.getenv("OBSERVATORY_POLL_INTERVAL", 60))
|
||||||
|
db_override = os.getenv("OBSERVATORY_DB_PATH")
|
||||||
|
if db_override:
|
||||||
|
cfg.db_path = Path(db_override)
|
||||||
|
cfg.disk_warn_pct = float(os.getenv("OBSERVATORY_DISK_WARN_PCT", 80))
|
||||||
|
cfg.disk_crit_pct = float(os.getenv("OBSERVATORY_DISK_CRIT_PCT", 90))
|
||||||
|
cfg.mem_warn_pct = float(os.getenv("OBSERVATORY_MEM_WARN_PCT", 80))
|
||||||
|
cfg.mem_crit_pct = float(os.getenv("OBSERVATORY_MEM_CRIT_PCT", 90))
|
||||||
|
cfg.cpu_warn_pct = float(os.getenv("OBSERVATORY_CPU_WARN_PCT", 80))
|
||||||
|
cfg.cpu_crit_pct = float(os.getenv("OBSERVATORY_CPU_CRIT_PCT", 95))
|
||||||
|
cfg.webhook_url = os.getenv("OBSERVATORY_WEBHOOK_URL", "http://127.0.0.1:8080/health")
|
||||||
|
cfg.api_url = os.getenv("OBSERVATORY_API_URL", "http://127.0.0.1:8642/health")
|
||||||
|
cfg.webhook_latency_slo_ms = float(os.getenv("OBSERVATORY_WEBHOOK_LATENCY_SLO_MS", 2000))
|
||||||
|
cfg.gateway_uptime_slo_pct = float(os.getenv("OBSERVATORY_GATEWAY_UPTIME_SLO_PCT", 99.5))
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Health check models
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CheckResult:
|
||||||
|
name: str
|
||||||
|
status: str # "ok" | "warn" | "critical" | "error"
|
||||||
|
message: str
|
||||||
|
value: Optional[float] = None
|
||||||
|
unit: Optional[str] = None
|
||||||
|
extra: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HealthSnapshot:
|
||||||
|
ts: str # ISO8601 UTC
|
||||||
|
checks: List[CheckResult] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def overall_status(self) -> str:
|
||||||
|
statuses = {c.status for c in self.checks}
|
||||||
|
if "critical" in statuses or "error" in statuses:
|
||||||
|
return "critical"
|
||||||
|
if "warn" in statuses:
|
||||||
|
return "warn"
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"ts": self.ts,
|
||||||
|
"overall": self.overall_status,
|
||||||
|
"checks": [asdict(c) for c in self.checks],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Individual health checks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def check_gateway_liveness() -> CheckResult:
|
||||||
|
"""Check whether the Hermes gateway process is running."""
|
||||||
|
try:
|
||||||
|
from gateway.status import is_gateway_running, get_running_pid
|
||||||
|
running = is_gateway_running()
|
||||||
|
pid = get_running_pid()
|
||||||
|
if running:
|
||||||
|
return CheckResult(
|
||||||
|
name="gateway_process",
|
||||||
|
status="ok",
|
||||||
|
message=f"Gateway running (pid={pid})",
|
||||||
|
value=float(pid) if pid else None,
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="gateway_process",
|
||||||
|
status="critical",
|
||||||
|
message="Gateway process is NOT running",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="gateway_process",
|
||||||
|
status="error",
|
||||||
|
message=f"Could not determine gateway status: {exc}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_api_server_http(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check API server /health endpoint responsiveness."""
|
||||||
|
url = cfg.api_url
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, method="GET")
|
||||||
|
req.add_header("User-Agent", "hermes-observatory/1.0")
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
body = resp.read(512).decode("utf-8", errors="replace")
|
||||||
|
status_code = resp.status
|
||||||
|
if status_code < 400:
|
||||||
|
slo_ok = latency_ms <= cfg.webhook_latency_slo_ms
|
||||||
|
return CheckResult(
|
||||||
|
name="api_server_http",
|
||||||
|
status="ok" if slo_ok else "warn",
|
||||||
|
message=f"API server OK ({latency_ms:.0f}ms){'' if slo_ok else ' — exceeds latency SLO'}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
extra={"status_code": status_code, "body_preview": body[:100]},
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="api_server_http",
|
||||||
|
status="critical",
|
||||||
|
message=f"API server returned HTTP {status_code}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
except urllib.error.URLError as exc:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
# Not running is acceptable if gateway is not configured for API
|
||||||
|
reason = str(exc.reason) if hasattr(exc, "reason") else str(exc)
|
||||||
|
if "Connection refused" in reason or "Connection reset" in reason:
|
||||||
|
return CheckResult(
|
||||||
|
name="api_server_http",
|
||||||
|
status="warn",
|
||||||
|
message=f"API server not reachable at {url} (not started?)",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="api_server_http",
|
||||||
|
status="error",
|
||||||
|
message=f"API server probe error: {exc}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
return CheckResult(
|
||||||
|
name="api_server_http",
|
||||||
|
status="error",
|
||||||
|
message=f"API server probe exception: {exc}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_webhook_http(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check webhook endpoint responsiveness."""
|
||||||
|
url = cfg.webhook_url
|
||||||
|
start = time.monotonic()
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, method="GET")
|
||||||
|
req.add_header("User-Agent", "hermes-observatory/1.0")
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
status_code = resp.status
|
||||||
|
slo_ok = latency_ms <= cfg.webhook_latency_slo_ms
|
||||||
|
if status_code < 400:
|
||||||
|
return CheckResult(
|
||||||
|
name="webhook_http",
|
||||||
|
status="ok" if slo_ok else "warn",
|
||||||
|
message=f"Webhook OK ({latency_ms:.0f}ms){'' if slo_ok else ' — exceeds latency SLO'}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
extra={"status_code": status_code},
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="webhook_http",
|
||||||
|
status="critical",
|
||||||
|
message=f"Webhook returned HTTP {status_code}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
except urllib.error.URLError as exc:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
reason = str(exc.reason) if hasattr(exc, "reason") else str(exc)
|
||||||
|
if "Connection refused" in reason or "Connection reset" in reason:
|
||||||
|
return CheckResult(
|
||||||
|
name="webhook_http",
|
||||||
|
status="warn",
|
||||||
|
message=f"Webhook not reachable at {url} (not started?)",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="webhook_http",
|
||||||
|
status="error",
|
||||||
|
message=f"Webhook probe error: {exc}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
latency_ms = (time.monotonic() - start) * 1000
|
||||||
|
return CheckResult(
|
||||||
|
name="webhook_http",
|
||||||
|
status="error",
|
||||||
|
message=f"Webhook probe exception: {exc}",
|
||||||
|
value=latency_ms,
|
||||||
|
unit="ms",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_disk(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check disk usage on the HERMES_HOME filesystem."""
|
||||||
|
if not _PSUTIL:
|
||||||
|
return CheckResult(name="disk", status="error", message="psutil not installed")
|
||||||
|
try:
|
||||||
|
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
path = str(hermes_home) if hermes_home.exists() else "/"
|
||||||
|
usage = psutil.disk_usage(path)
|
||||||
|
pct = usage.percent
|
||||||
|
free_gb = usage.free / (1024 ** 3)
|
||||||
|
if pct >= cfg.disk_crit_pct:
|
||||||
|
status = "critical"
|
||||||
|
elif pct >= cfg.disk_warn_pct:
|
||||||
|
status = "warn"
|
||||||
|
else:
|
||||||
|
status = "ok"
|
||||||
|
return CheckResult(
|
||||||
|
name="disk",
|
||||||
|
status=status,
|
||||||
|
message=f"Disk {pct:.1f}% used ({free_gb:.1f}GB free)",
|
||||||
|
value=pct,
|
||||||
|
unit="%",
|
||||||
|
extra={"free_bytes": usage.free, "total_bytes": usage.total},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(name="disk", status="error", message=f"Disk check error: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_memory(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check system memory usage."""
|
||||||
|
if not _PSUTIL:
|
||||||
|
return CheckResult(name="memory", status="error", message="psutil not installed")
|
||||||
|
try:
|
||||||
|
mem = psutil.virtual_memory()
|
||||||
|
pct = mem.percent
|
||||||
|
available_gb = mem.available / (1024 ** 3)
|
||||||
|
if pct >= cfg.mem_crit_pct:
|
||||||
|
status = "critical"
|
||||||
|
elif pct >= cfg.mem_warn_pct:
|
||||||
|
status = "warn"
|
||||||
|
else:
|
||||||
|
status = "ok"
|
||||||
|
return CheckResult(
|
||||||
|
name="memory",
|
||||||
|
status=status,
|
||||||
|
message=f"Memory {pct:.1f}% used ({available_gb:.1f}GB available)",
|
||||||
|
value=pct,
|
||||||
|
unit="%",
|
||||||
|
extra={"available_bytes": mem.available, "total_bytes": mem.total},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(name="memory", status="error", message=f"Memory check error: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_cpu(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check CPU usage (1-second sample)."""
|
||||||
|
if not _PSUTIL:
|
||||||
|
return CheckResult(name="cpu", status="error", message="psutil not installed")
|
||||||
|
try:
|
||||||
|
pct = psutil.cpu_percent(interval=1)
|
||||||
|
if pct >= cfg.cpu_crit_pct:
|
||||||
|
status = "critical"
|
||||||
|
elif pct >= cfg.cpu_warn_pct:
|
||||||
|
status = "warn"
|
||||||
|
else:
|
||||||
|
status = "ok"
|
||||||
|
return CheckResult(
|
||||||
|
name="cpu",
|
||||||
|
status=status,
|
||||||
|
message=f"CPU {pct:.1f}%",
|
||||||
|
value=pct,
|
||||||
|
unit="%",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(name="cpu", status="error", message=f"CPU check error: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_database(cfg: ObservatoryConfig) -> CheckResult:
|
||||||
|
"""Check observatory SQLite DB connectivity and size."""
|
||||||
|
db_path = cfg.db_path
|
||||||
|
try:
|
||||||
|
if not db_path.exists():
|
||||||
|
return CheckResult(
|
||||||
|
name="database",
|
||||||
|
status="warn",
|
||||||
|
message=f"Observatory DB not yet created at {db_path}",
|
||||||
|
)
|
||||||
|
size_kb = db_path.stat().st_size / 1024
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=5)
|
||||||
|
conn.execute("SELECT count(*) FROM health_snapshots").fetchone()
|
||||||
|
conn.close()
|
||||||
|
return CheckResult(
|
||||||
|
name="database",
|
||||||
|
status="ok",
|
||||||
|
message=f"Observatory DB OK ({size_kb:.1f}KB)",
|
||||||
|
value=size_kb,
|
||||||
|
unit="KB",
|
||||||
|
extra={"path": str(db_path)},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="database",
|
||||||
|
status="error",
|
||||||
|
message=f"DB check error: {exc}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_response_store_db() -> CheckResult:
|
||||||
|
"""Check the API server's SQLite response store DB if it exists."""
|
||||||
|
try:
|
||||||
|
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
db_path = hermes_home / "response_store.db"
|
||||||
|
if not db_path.exists():
|
||||||
|
return CheckResult(
|
||||||
|
name="response_store_db",
|
||||||
|
status="ok",
|
||||||
|
message="Response store DB not present (API server not yet used)",
|
||||||
|
)
|
||||||
|
size_kb = db_path.stat().st_size / 1024
|
||||||
|
conn = sqlite3.connect(str(db_path), timeout=5)
|
||||||
|
count = conn.execute("SELECT count(*) FROM responses").fetchone()[0]
|
||||||
|
conn.close()
|
||||||
|
return CheckResult(
|
||||||
|
name="response_store_db",
|
||||||
|
status="ok",
|
||||||
|
message=f"Response store DB OK ({count} responses, {size_kb:.1f}KB)",
|
||||||
|
value=size_kb,
|
||||||
|
unit="KB",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="response_store_db",
|
||||||
|
status="error",
|
||||||
|
message=f"Response store DB error: {exc}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Snapshot collector
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def collect_snapshot(cfg: ObservatoryConfig) -> HealthSnapshot:
|
||||||
|
"""Run all checks and return a HealthSnapshot."""
|
||||||
|
ts = datetime.now(timezone.utc).isoformat()
|
||||||
|
checks = [
|
||||||
|
check_gateway_liveness(),
|
||||||
|
check_api_server_http(cfg),
|
||||||
|
check_webhook_http(cfg),
|
||||||
|
check_disk(cfg),
|
||||||
|
check_memory(cfg),
|
||||||
|
check_cpu(cfg),
|
||||||
|
check_database(cfg),
|
||||||
|
check_response_store_db(),
|
||||||
|
]
|
||||||
|
return HealthSnapshot(ts=ts, checks=checks)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SQLite persistence
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _db(path: Path):
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
conn = sqlite3.connect(str(path), timeout=10)
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA foreign_keys=ON")
|
||||||
|
try:
|
||||||
|
yield conn
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _init_db(path: Path) -> None:
|
||||||
|
"""Create tables if they don't exist."""
|
||||||
|
with _db(path) as conn:
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS health_snapshots (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
ts TEXT NOT NULL,
|
||||||
|
overall TEXT NOT NULL,
|
||||||
|
payload TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_snapshots_ts ON health_snapshots(ts)")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS alerts_sent (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
ts TEXT NOT NULL,
|
||||||
|
check_name TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
message TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_alerts_ts ON alerts_sent(ts)")
|
||||||
|
|
||||||
|
|
||||||
|
def store_snapshot(cfg: ObservatoryConfig, snapshot: HealthSnapshot) -> None:
|
||||||
|
"""Persist snapshot to SQLite."""
|
||||||
|
_init_db(cfg.db_path)
|
||||||
|
payload = json.dumps(snapshot.to_dict())
|
||||||
|
with _db(cfg.db_path) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO health_snapshots (ts, overall, payload) VALUES (?, ?, ?)",
|
||||||
|
(snapshot.ts, snapshot.overall_status, payload),
|
||||||
|
)
|
||||||
|
# Prune records older than RETENTION_DAYS
|
||||||
|
cutoff = (datetime.now(timezone.utc) - timedelta(days=RETENTION_DAYS)).isoformat()
|
||||||
|
conn.execute("DELETE FROM health_snapshots WHERE ts < ?", (cutoff,))
|
||||||
|
|
||||||
|
|
||||||
|
def record_alert_sent(cfg: ObservatoryConfig, check_name: str, status: str, message: str) -> None:
|
||||||
|
"""Record that an alert was dispatched."""
|
||||||
|
_init_db(cfg.db_path)
|
||||||
|
with _db(cfg.db_path) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO alerts_sent (ts, check_name, status, message) VALUES (?, ?, ?, ?)",
|
||||||
|
(datetime.now(timezone.utc).isoformat(), check_name, status, message),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_snapshots(cfg: ObservatoryConfig, days: int = RETENTION_DAYS) -> List[Dict[str, Any]]:
|
||||||
|
"""Load snapshots from the last N days."""
|
||||||
|
if not cfg.db_path.exists():
|
||||||
|
return []
|
||||||
|
cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
|
||||||
|
with _db(cfg.db_path) as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT ts, overall, payload FROM health_snapshots WHERE ts >= ? ORDER BY ts DESC",
|
||||||
|
(cutoff,),
|
||||||
|
).fetchall()
|
||||||
|
return [json.loads(row[2]) for row in rows]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Alerting
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _telegram_send(token: str, chat_id: str, text: str) -> bool:
|
||||||
|
"""Send a Telegram message via the Bot API. Returns True on success."""
|
||||||
|
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
||||||
|
payload = json.dumps({
|
||||||
|
"chat_id": chat_id,
|
||||||
|
"text": text,
|
||||||
|
"parse_mode": "HTML",
|
||||||
|
"disable_web_page_preview": True,
|
||||||
|
}).encode("utf-8")
|
||||||
|
req = urllib.request.Request(url, data=payload, method="POST")
|
||||||
|
req.add_header("Content-Type", "application/json")
|
||||||
|
req.add_header("User-Agent", "hermes-observatory/1.0")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
body = json.loads(resp.read())
|
||||||
|
return bool(body.get("ok"))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Telegram send failed: %s", exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _status_emoji(status: str) -> str:
|
||||||
|
return {"ok": "✅", "warn": "⚠️", "critical": "🔴", "error": "❌"}.get(status, "❓")
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_alert(cfg: ObservatoryConfig, snapshot: HealthSnapshot, prev_snapshot: Optional[HealthSnapshot]) -> List[str]:
|
||||||
|
"""
|
||||||
|
Fire Telegram alerts for newly degraded checks.
|
||||||
|
Returns list of alert messages sent.
|
||||||
|
"""
|
||||||
|
if not cfg.telegram_token or not cfg.alert_chat_id:
|
||||||
|
return []
|
||||||
|
|
||||||
|
alerts_sent = []
|
||||||
|
prev_statuses: Dict[str, str] = {}
|
||||||
|
if prev_snapshot:
|
||||||
|
for c in prev_snapshot.checks:
|
||||||
|
prev_statuses[c.name] = c.status
|
||||||
|
|
||||||
|
for check in snapshot.checks:
|
||||||
|
if check.status in ("critical", "error"):
|
||||||
|
prev = prev_statuses.get(check.name, "ok")
|
||||||
|
if prev not in ("critical", "error"):
|
||||||
|
# Newly degraded — alert
|
||||||
|
emoji = _status_emoji(check.status)
|
||||||
|
msg = (
|
||||||
|
f"{emoji} <b>Hermes Observatory Alert</b>\n\n"
|
||||||
|
f"<b>Check:</b> {check.name}\n"
|
||||||
|
f"<b>Status:</b> {check.status.upper()}\n"
|
||||||
|
f"<b>Message:</b> {check.message}\n"
|
||||||
|
f"<b>Time:</b> {snapshot.ts}"
|
||||||
|
)
|
||||||
|
if _telegram_send(cfg.telegram_token, cfg.alert_chat_id, msg):
|
||||||
|
alerts_sent.append(msg)
|
||||||
|
record_alert_sent(cfg, check.name, check.status, check.message)
|
||||||
|
logger.info("Alert sent for %s (%s)", check.name, check.status)
|
||||||
|
elif check.status == "ok":
|
||||||
|
prev = prev_statuses.get(check.name)
|
||||||
|
if prev in ("critical", "error"):
|
||||||
|
# Recovery alert
|
||||||
|
msg = (
|
||||||
|
f"✅ <b>Hermes Observatory — Recovery</b>\n\n"
|
||||||
|
f"<b>Check:</b> {check.name} has recovered\n"
|
||||||
|
f"<b>Message:</b> {check.message}\n"
|
||||||
|
f"<b>Time:</b> {snapshot.ts}"
|
||||||
|
)
|
||||||
|
if _telegram_send(cfg.telegram_token, cfg.alert_chat_id, msg):
|
||||||
|
alerts_sent.append(msg)
|
||||||
|
record_alert_sent(cfg, check.name, "recovery", check.message)
|
||||||
|
|
||||||
|
return alerts_sent
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Daily digest
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_digest(cfg: ObservatoryConfig) -> str:
|
||||||
|
"""Build a daily health digest from stored snapshots."""
|
||||||
|
snapshots = load_snapshots(cfg, days=1)
|
||||||
|
total = len(snapshots)
|
||||||
|
if total == 0:
|
||||||
|
return "No health data available for the last 24 hours."
|
||||||
|
|
||||||
|
# Count by overall status
|
||||||
|
status_counts: Dict[str, int] = {"ok": 0, "warn": 0, "critical": 0, "error": 0}
|
||||||
|
check_degraded_counts: Dict[str, int] = {}
|
||||||
|
latencies: Dict[str, List[float]] = {}
|
||||||
|
|
||||||
|
for snap in snapshots:
|
||||||
|
overall = snap.get("overall", "ok")
|
||||||
|
status_counts[overall] = status_counts.get(overall, 0) + 1
|
||||||
|
for check in snap.get("checks", []):
|
||||||
|
name = check["name"]
|
||||||
|
status = check["status"]
|
||||||
|
if status in ("critical", "error", "warn"):
|
||||||
|
check_degraded_counts[name] = check_degraded_counts.get(name, 0) + 1
|
||||||
|
value = check.get("value")
|
||||||
|
unit = check.get("unit")
|
||||||
|
if value is not None and unit == "ms":
|
||||||
|
if name not in latencies:
|
||||||
|
latencies[name] = []
|
||||||
|
latencies[name].append(float(value))
|
||||||
|
|
||||||
|
uptime_pct = 100.0 * status_counts["ok"] / total if total else 0.0
|
||||||
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
f"📊 <b>Hermes Observatory — Daily Digest</b>",
|
||||||
|
f"<b>Generated:</b> {now}",
|
||||||
|
f"",
|
||||||
|
f"<b>Last 24h Summary</b> ({total} samples)",
|
||||||
|
f" Healthy: {status_counts['ok']} ({100*status_counts['ok']//total if total else 0}%)",
|
||||||
|
f" Warning: {status_counts.get('warn', 0)}",
|
||||||
|
f" Critical: {status_counts.get('critical', 0)}",
|
||||||
|
f" Error: {status_counts.get('error', 0)}",
|
||||||
|
f"",
|
||||||
|
]
|
||||||
|
|
||||||
|
# SLO status
|
||||||
|
lines.append("<b>SLO Status</b>")
|
||||||
|
gw_uptime_target = cfg.gateway_uptime_slo_pct
|
||||||
|
gw_snapshots = [
|
||||||
|
s for s in snapshots
|
||||||
|
if any(c["name"] == "gateway_process" and c["status"] == "ok" for c in s.get("checks", []))
|
||||||
|
]
|
||||||
|
gw_uptime = 100.0 * len(gw_snapshots) / total if total else 0.0
|
||||||
|
gw_ok = gw_uptime >= gw_uptime_target
|
||||||
|
lines.append(
|
||||||
|
f" {'✅' if gw_ok else '❌'} Gateway uptime: {gw_uptime:.1f}% (target: ≥{gw_uptime_target}%)"
|
||||||
|
)
|
||||||
|
|
||||||
|
wh_latency_target = cfg.webhook_latency_slo_ms
|
||||||
|
if "webhook_http" in latencies and latencies["webhook_http"]:
|
||||||
|
wh_vals = sorted(latencies["webhook_http"])
|
||||||
|
p95_idx = int(len(wh_vals) * 0.95)
|
||||||
|
p95 = wh_vals[min(p95_idx, len(wh_vals) - 1)]
|
||||||
|
wh_ok = p95 <= wh_latency_target
|
||||||
|
lines.append(
|
||||||
|
f" {'✅' if wh_ok else '❌'} Webhook p95 latency: {p95:.0f}ms (target: ≤{wh_latency_target:.0f}ms)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
lines.append(f" ⚫ Webhook latency: no data")
|
||||||
|
|
||||||
|
if "api_server_http" in latencies and latencies["api_server_http"]:
|
||||||
|
api_vals = sorted(latencies["api_server_http"])
|
||||||
|
p95_idx = int(len(api_vals) * 0.95)
|
||||||
|
p95 = api_vals[min(p95_idx, len(api_vals) - 1)]
|
||||||
|
api_ok = p95 <= wh_latency_target
|
||||||
|
lines.append(
|
||||||
|
f" {'✅' if api_ok else '❌'} API server p95 latency: {p95:.0f}ms (target: ≤{wh_latency_target:.0f}ms)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Top degraded checks
|
||||||
|
if check_degraded_counts:
|
||||||
|
lines.append("")
|
||||||
|
lines.append("<b>Degraded Checks (24h)</b>")
|
||||||
|
for name, count in sorted(check_degraded_counts.items(), key=lambda x: -x[1]):
|
||||||
|
pct = 100 * count // total if total else 0
|
||||||
|
lines.append(f" • {name}: {count} incidents ({pct}%)")
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"<i>Observatory DB: {cfg.db_path}</i>")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def send_digest(cfg: ObservatoryConfig) -> bool:
|
||||||
|
"""Build and send the daily digest to Telegram. Returns True on success."""
|
||||||
|
digest = build_digest(cfg)
|
||||||
|
if cfg.telegram_token and cfg.digest_chat_id:
|
||||||
|
return _telegram_send(cfg.telegram_token, cfg.digest_chat_id, digest)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Display helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_STATUS_COLORS = {
|
||||||
|
"ok": "\033[32m", # green
|
||||||
|
"warn": "\033[33m", # yellow
|
||||||
|
"critical": "\033[31m", # red
|
||||||
|
"error": "\033[91m", # bright red
|
||||||
|
}
|
||||||
|
_RESET = "\033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
def _color_status(status: str) -> str:
|
||||||
|
c = _STATUS_COLORS.get(status, "")
|
||||||
|
return f"{c}{status.upper()}{_RESET}"
|
||||||
|
|
||||||
|
|
||||||
|
def print_snapshot(snapshot: HealthSnapshot) -> None:
|
||||||
|
overall_color = _STATUS_COLORS.get(snapshot.overall_status, "")
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f" Hermes Observatory — {snapshot.ts}")
|
||||||
|
print(f" Overall: {overall_color}{snapshot.overall_status.upper()}{_RESET}")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
for check in snapshot.checks:
|
||||||
|
emoji = _status_emoji(check.status)
|
||||||
|
val_str = f" [{check.value:.1f}{check.unit}]" if check.value is not None and check.unit else ""
|
||||||
|
print(f" {emoji} {check.name:<25} {_color_status(check.status):<15} {check.message}{val_str}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def print_slo_report(cfg: ObservatoryConfig) -> None:
|
||||||
|
"""Print current SLO definitions and targets."""
|
||||||
|
snapshots = load_snapshots(cfg, days=30)
|
||||||
|
total = len(snapshots)
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(" Hermes Observatory — SLO Report (last 30 days)")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
for slo_key, slo in SLO_DEFINITIONS.items():
|
||||||
|
print(f"\n {slo['description']}")
|
||||||
|
print(f" Target: {slo['target']}{slo['unit']}")
|
||||||
|
if total == 0:
|
||||||
|
print(f" Status: no data")
|
||||||
|
continue
|
||||||
|
if slo_key == "gateway_uptime_pct":
|
||||||
|
ok_count = sum(
|
||||||
|
1 for s in snapshots
|
||||||
|
if any(c["name"] == "gateway_process" and c["status"] == "ok"
|
||||||
|
for c in s.get("checks", []))
|
||||||
|
)
|
||||||
|
actual = 100.0 * ok_count / total
|
||||||
|
met = actual >= slo["target"]
|
||||||
|
print(f" Actual: {actual:.2f}% {'✅ MET' if met else '❌ MISSED'}")
|
||||||
|
elif slo_key in ("webhook_latency_ms", "api_server_http_latency_ms"):
|
||||||
|
check_name = "webhook_http" if "webhook" in slo_key else "api_server_http"
|
||||||
|
vals = [
|
||||||
|
float(c["value"])
|
||||||
|
for s in snapshots
|
||||||
|
for c in s.get("checks", [])
|
||||||
|
if c["name"] == check_name and c.get("value") is not None
|
||||||
|
]
|
||||||
|
if vals:
|
||||||
|
vals.sort()
|
||||||
|
p95_idx = int(len(vals) * 0.95)
|
||||||
|
p95 = vals[min(p95_idx, len(vals) - 1)]
|
||||||
|
met = p95 <= slo["target"]
|
||||||
|
print(f" p95: {p95:.0f}ms {'✅ MET' if met else '❌ MISSED'}")
|
||||||
|
else:
|
||||||
|
print(f" Status: no latency data")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
def print_history(cfg: ObservatoryConfig, count: int = 20) -> None:
|
||||||
|
"""Print recent health records."""
|
||||||
|
snapshots = load_snapshots(cfg, days=RETENTION_DAYS)[:count]
|
||||||
|
if not snapshots:
|
||||||
|
print("No history available.")
|
||||||
|
return
|
||||||
|
print(f"\n{'='*60}")
|
||||||
|
print(f" Last {min(count, len(snapshots))} health records")
|
||||||
|
print(f"{'='*60}")
|
||||||
|
for snap in snapshots:
|
||||||
|
ts = snap.get("ts", "?")
|
||||||
|
overall = snap.get("overall", "?")
|
||||||
|
emoji = _status_emoji(overall)
|
||||||
|
degraded = [c["name"] for c in snap.get("checks", []) if c["status"] != "ok"]
|
||||||
|
degraded_str = f" — issues: {', '.join(degraded)}" if degraded else ""
|
||||||
|
print(f" {emoji} {ts} {overall.upper()}{degraded_str}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Daemon mode
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class Observatory:
|
||||||
|
"""Continuous monitoring daemon."""
|
||||||
|
|
||||||
|
def __init__(self, cfg: ObservatoryConfig):
|
||||||
|
self.cfg = cfg
|
||||||
|
self._running = False
|
||||||
|
self._prev_snapshot: Optional[HealthSnapshot] = None
|
||||||
|
|
||||||
|
def _handle_signal(self, signum: int, frame: Any) -> None:
|
||||||
|
logger.info("Received signal %d, shutting down...", signum)
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
def run_once(self) -> HealthSnapshot:
|
||||||
|
snapshot = collect_snapshot(self.cfg)
|
||||||
|
store_snapshot(self.cfg, snapshot)
|
||||||
|
alerts = maybe_alert(self.cfg, snapshot, self._prev_snapshot)
|
||||||
|
if alerts:
|
||||||
|
logger.info("Sent %d alert(s)", len(alerts))
|
||||||
|
self._prev_snapshot = snapshot
|
||||||
|
return snapshot
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
_init_db(self.cfg.db_path)
|
||||||
|
logger.info(
|
||||||
|
"Observatory starting — poll_interval=%ds db=%s",
|
||||||
|
self.cfg.poll_interval,
|
||||||
|
self.cfg.db_path,
|
||||||
|
)
|
||||||
|
self._running = True
|
||||||
|
signal.signal(signal.SIGINT, self._handle_signal)
|
||||||
|
signal.signal(signal.SIGTERM, self._handle_signal)
|
||||||
|
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
snapshot = self.run_once()
|
||||||
|
logger.info("Health check: %s", snapshot.overall_status)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error("Health check failed: %s", exc, exc_info=True)
|
||||||
|
if self._running:
|
||||||
|
time.sleep(self.cfg.poll_interval)
|
||||||
|
|
||||||
|
logger.info("Observatory stopped.")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main(argv: Optional[List[str]] = None) -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Hermes Observatory — health monitoring & alerting",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
)
|
||||||
|
parser.add_argument("--check", action="store_true", help="Run one health check and print results")
|
||||||
|
parser.add_argument("--daemon", action="store_true", help="Run as continuous monitoring daemon")
|
||||||
|
parser.add_argument("--digest", action="store_true", help="Print (and optionally send) daily digest")
|
||||||
|
parser.add_argument("--history", type=int, metavar="N", help="Show last N health records")
|
||||||
|
parser.add_argument("--slo", action="store_true", help="Print SLO report")
|
||||||
|
parser.add_argument("--send-digest", action="store_true", help="Send daily digest via Telegram")
|
||||||
|
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
||||||
|
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||||
|
format="%(asctime)s %(levelname)s [observatory] %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
cfg = ObservatoryConfig.from_env()
|
||||||
|
_init_db(cfg.db_path)
|
||||||
|
|
||||||
|
if args.check:
|
||||||
|
snapshot = collect_snapshot(cfg)
|
||||||
|
store_snapshot(cfg, snapshot)
|
||||||
|
print_snapshot(snapshot)
|
||||||
|
return 0 if snapshot.overall_status == "ok" else 1
|
||||||
|
|
||||||
|
if args.daemon:
|
||||||
|
obs = Observatory(cfg)
|
||||||
|
obs.run()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if args.digest or args.send_digest:
|
||||||
|
digest = build_digest(cfg)
|
||||||
|
print(digest)
|
||||||
|
if args.send_digest:
|
||||||
|
ok = send_digest(cfg)
|
||||||
|
if ok:
|
||||||
|
print("\n[Digest sent to Telegram]")
|
||||||
|
else:
|
||||||
|
print("\n[Telegram send skipped — token/chat_id not configured]")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if args.history is not None:
|
||||||
|
print_history(cfg, args.history)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if args.slo:
|
||||||
|
print_slo_report(cfg)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Default: one-shot check
|
||||||
|
snapshot = collect_snapshot(cfg)
|
||||||
|
store_snapshot(cfg, snapshot)
|
||||||
|
print_snapshot(snapshot)
|
||||||
|
return 0 if snapshot.overall_status == "ok" else 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -42,6 +42,7 @@ dependencies = [
|
|||||||
modal = ["modal>=1.0.0,<2"]
|
modal = ["modal>=1.0.0,<2"]
|
||||||
daytona = ["daytona>=0.148.0,<1"]
|
daytona = ["daytona>=0.148.0,<1"]
|
||||||
dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
|
dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
|
||||||
|
observatory = ["psutil>=5.9.0,<7"]
|
||||||
messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||||
cron = ["croniter>=6.0.0,<7"]
|
cron = ["croniter>=6.0.0,<7"]
|
||||||
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
|
||||||
|
|||||||
16
run_agent.py
16
run_agent.py
@@ -5390,22 +5390,6 @@ class AIAgent:
|
|||||||
independent: read-only tools may always share the parallel path, while
|
independent: read-only tools may always share the parallel path, while
|
||||||
file reads/writes may do so only when their target paths do not overlap.
|
file reads/writes may do so only when their target paths do not overlap.
|
||||||
"""
|
"""
|
||||||
# ── Pre-tool-check: Stop Protocol gate ─────────────────────────────
|
|
||||||
try:
|
|
||||||
from agent.stop_protocol import StopProtocol
|
|
||||||
stop_protocol = StopProtocol()
|
|
||||||
if stop_protocol.enforce(messages):
|
|
||||||
for tc in assistant_message.tool_calls or []:
|
|
||||||
messages.append({
|
|
||||||
"role": "tool",
|
|
||||||
"content": StopProtocol.build_cancelled_result(tc.function.name),
|
|
||||||
"tool_call_id": tc.id,
|
|
||||||
})
|
|
||||||
return
|
|
||||||
except Exception:
|
|
||||||
# Fail open — never let the stop protocol crash block normal execution
|
|
||||||
pass
|
|
||||||
|
|
||||||
tool_calls = assistant_message.tool_calls
|
tool_calls = assistant_message.tool_calls
|
||||||
|
|
||||||
# Allow _vprint during tool execution even with stream consumers
|
# Allow _vprint during tool execution even with stream consumers
|
||||||
|
|||||||
371
scripts/deploy-validate
Executable file
371
scripts/deploy-validate
Executable file
@@ -0,0 +1,371 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
deploy-validate — pre-flight configuration checker for Hermes deployments.
|
||||||
|
|
||||||
|
Catches common configuration errors BEFORE they cause runtime failures.
|
||||||
|
Safe to run at any time: it only reads files and makes lightweight network
|
||||||
|
checks — it never writes state or sends messages.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/deploy-validate # validate current environment
|
||||||
|
python scripts/deploy-validate --dry-run # alias for the same thing
|
||||||
|
python scripts/deploy-validate --env /path/to/.env
|
||||||
|
|
||||||
|
Exit codes:
|
||||||
|
0 All checks passed (or only warnings).
|
||||||
|
1 One or more blocking errors found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
RESET = "\033[0m"
|
||||||
|
RED = "\033[91m"
|
||||||
|
YELLOW = "\033[93m"
|
||||||
|
GREEN = "\033[92m"
|
||||||
|
BOLD = "\033[1m"
|
||||||
|
|
||||||
|
|
||||||
|
def _color(text: str, code: str) -> str:
|
||||||
|
if sys.stdout.isatty():
|
||||||
|
return f"{code}{text}{RESET}"
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def ok(msg: str) -> None:
|
||||||
|
print(f" {_color('✔', GREEN)} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def warn(msg: str) -> None:
|
||||||
|
print(f" {_color('⚠', YELLOW)} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def error(msg: str) -> None:
|
||||||
|
print(f" {_color('✘', RED)} {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
def section(title: str) -> None:
|
||||||
|
print(f"\n{_color(BOLD + title, BOLD)}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# .env loader (minimal — avoids dependency on python-dotenv for portability)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_env_file(path: Path) -> dict[str, str]:
|
||||||
|
"""Parse a .env file and return a dict of key→value pairs."""
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
if not path.exists():
|
||||||
|
return result
|
||||||
|
for line in path.read_text(encoding="utf-8").splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or "=" not in line:
|
||||||
|
continue
|
||||||
|
key, _, value = line.partition("=")
|
||||||
|
key = key.strip()
|
||||||
|
# Strip inline comments and surrounding quotes.
|
||||||
|
value = value.split("#")[0].strip().strip("\"'")
|
||||||
|
if key:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Individual checks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def check_env_file(env_path: Path) -> dict[str, str]:
|
||||||
|
section("Environment file")
|
||||||
|
if not env_path.exists():
|
||||||
|
error(f".env not found at {env_path}")
|
||||||
|
error("Copy .env.example → .env and fill in your API keys.")
|
||||||
|
return {}
|
||||||
|
ok(f".env found at {env_path}")
|
||||||
|
|
||||||
|
raw = _load_env_file(env_path)
|
||||||
|
|
||||||
|
# Warn if any value looks like a placeholder.
|
||||||
|
placeholder_patterns = ("your_", "xxxx", "changeme", "todo", "replace_me")
|
||||||
|
for key, value in raw.items():
|
||||||
|
if value and any(p in value.lower() for p in placeholder_patterns):
|
||||||
|
warn(f"{key} looks like a placeholder: {value!r}")
|
||||||
|
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def check_llm_key(env: dict[str, str]) -> bool:
|
||||||
|
section("LLM provider")
|
||||||
|
providers = {
|
||||||
|
"OPENROUTER_API_KEY": "OpenRouter",
|
||||||
|
"ANTHROPIC_API_KEY": "Anthropic",
|
||||||
|
"OPENAI_API_KEY": "OpenAI",
|
||||||
|
"GLM_API_KEY": "z.ai / GLM",
|
||||||
|
"KIMI_API_KEY": "Kimi / Moonshot",
|
||||||
|
"MINIMAX_API_KEY": "MiniMax",
|
||||||
|
"NOUS_API_KEY": "Nous Portal",
|
||||||
|
"HF_TOKEN": "Hugging Face",
|
||||||
|
"KILOCODE_API_KEY": "KiloCode",
|
||||||
|
"OPENCODE_ZEN_API_KEY": "OpenCode Zen",
|
||||||
|
}
|
||||||
|
found = [name for key, name in providers.items() if env.get(key, "").strip()]
|
||||||
|
if not found:
|
||||||
|
error("No LLM API key detected. Set at least one (e.g. OPENROUTER_API_KEY).")
|
||||||
|
return False
|
||||||
|
ok(f"LLM provider key present: {', '.join(found)}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def check_hermes_home(env: dict[str, str]) -> Optional[Path]:
|
||||||
|
section("HERMES_HOME data directory")
|
||||||
|
raw = env.get("HERMES_HOME") or os.environ.get("HERMES_HOME") or ""
|
||||||
|
if raw:
|
||||||
|
home = Path(raw).expanduser()
|
||||||
|
else:
|
||||||
|
home = Path.home() / ".hermes"
|
||||||
|
|
||||||
|
if not home.exists():
|
||||||
|
warn(f"HERMES_HOME does not exist yet: {home} (will be created on first run)")
|
||||||
|
return home
|
||||||
|
|
||||||
|
ok(f"HERMES_HOME exists: {home}")
|
||||||
|
|
||||||
|
required_dirs = ["logs", "sessions", "cron", "memories", "skills"]
|
||||||
|
for d in required_dirs:
|
||||||
|
if not (home / d).is_dir():
|
||||||
|
warn(f"Expected subdirectory missing: {home / d} (created automatically at runtime)")
|
||||||
|
|
||||||
|
if (home / ".env").exists():
|
||||||
|
ok(f"Data-directory .env present: {home / '.env'}")
|
||||||
|
else:
|
||||||
|
warn(f"No .env in HERMES_HOME ({home}). "
|
||||||
|
"The Docker entrypoint copies .env.example on first run; "
|
||||||
|
"for bare-metal installs copy it manually.")
|
||||||
|
|
||||||
|
return home
|
||||||
|
|
||||||
|
|
||||||
|
def check_gateway_platforms(env: dict[str, str]) -> None:
|
||||||
|
section("Messaging platform tokens")
|
||||||
|
platforms: dict[str, list[str]] = {
|
||||||
|
"Telegram": ["TELEGRAM_BOT_TOKEN"],
|
||||||
|
"Discord": ["DISCORD_BOT_TOKEN"],
|
||||||
|
"Slack": ["SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"],
|
||||||
|
"WhatsApp": [], # pairing-based, no env key required
|
||||||
|
"Email": ["EMAIL_ADDRESS", "EMAIL_PASSWORD"],
|
||||||
|
}
|
||||||
|
any_found = False
|
||||||
|
for platform, keys in platforms.items():
|
||||||
|
if not keys:
|
||||||
|
continue # WhatsApp — no key check
|
||||||
|
if all(env.get(k, "").strip() for k in keys):
|
||||||
|
ok(f"{platform}: configured ({', '.join(keys)})")
|
||||||
|
any_found = True
|
||||||
|
if not any_found:
|
||||||
|
warn("No messaging platform tokens found. "
|
||||||
|
"The gateway will start but accept no inbound messages. "
|
||||||
|
"Set at least one platform token (e.g. TELEGRAM_BOT_TOKEN).")
|
||||||
|
|
||||||
|
|
||||||
|
def check_api_server_reachable(host: str = "127.0.0.1", port: int = 8642) -> None:
|
||||||
|
section("API server health check")
|
||||||
|
url = f"http://{host}:{port}/health"
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(url, timeout=5) as resp:
|
||||||
|
body = resp.read().decode()
|
||||||
|
if '"status"' in body and "ok" in body:
|
||||||
|
ok(f"API server healthy: {url}")
|
||||||
|
else:
|
||||||
|
warn(f"Unexpected /health response from {url}: {body[:200]}")
|
||||||
|
except urllib.error.URLError as exc:
|
||||||
|
# Not a failure — the server may not be running in --dry-run mode.
|
||||||
|
warn(f"API server not reachable at {url}: {exc.reason} "
|
||||||
|
"(expected if gateway is not running)")
|
||||||
|
except OSError as exc:
|
||||||
|
warn(f"API server not reachable at {url}: {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_gateway_status(hermes_home: Optional[Path]) -> None:
|
||||||
|
section("Gateway runtime status")
|
||||||
|
if hermes_home is None:
|
||||||
|
warn("HERMES_HOME unknown — skipping runtime status check.")
|
||||||
|
return
|
||||||
|
|
||||||
|
state_file = hermes_home / "gateway_state.json"
|
||||||
|
pid_file = hermes_home / "gateway.pid"
|
||||||
|
|
||||||
|
if not state_file.exists() and not pid_file.exists():
|
||||||
|
warn("Gateway does not appear to be running (no PID or state file). "
|
||||||
|
"This is expected before the first start.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if state_file.exists():
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
state = json.loads(state_file.read_text())
|
||||||
|
gw_state = state.get("gateway_state", "unknown")
|
||||||
|
updated = state.get("updated_at", "?")
|
||||||
|
if gw_state == "running":
|
||||||
|
ok(f"Gateway state: {gw_state} (updated {updated})")
|
||||||
|
platforms = state.get("platforms", {})
|
||||||
|
for plat, pdata in platforms.items():
|
||||||
|
pstate = pdata.get("state", "unknown")
|
||||||
|
if pstate in ("connected", "running", "ok"):
|
||||||
|
ok(f" Platform {plat}: {pstate}")
|
||||||
|
else:
|
||||||
|
warn(f" Platform {plat}: {pstate} — {pdata.get('error_message', '')}")
|
||||||
|
elif gw_state in ("stopped", "startup_failed"):
|
||||||
|
error(f"Gateway state: {gw_state} — {state.get('exit_reason', 'no reason recorded')}")
|
||||||
|
else:
|
||||||
|
warn(f"Gateway state: {gw_state}")
|
||||||
|
except Exception as exc:
|
||||||
|
warn(f"Could not parse {state_file}: {exc}")
|
||||||
|
else:
|
||||||
|
warn("State file missing; only PID file found. Gateway may be starting.")
|
||||||
|
|
||||||
|
|
||||||
|
def check_docker_available() -> None:
|
||||||
|
section("Docker / compose availability")
|
||||||
|
for cmd in ("docker", "docker compose"):
|
||||||
|
_check_command(cmd.split()[0], cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_command(name: str, display: str) -> bool:
|
||||||
|
import shutil
|
||||||
|
if shutil.which(name):
|
||||||
|
ok(f"{display} found")
|
||||||
|
return True
|
||||||
|
warn(f"{display} not found in PATH (only required for Docker deployments)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def check_ports_free(ports: list[int] = None) -> None:
|
||||||
|
section("Port availability")
|
||||||
|
if ports is None:
|
||||||
|
ports = [8642]
|
||||||
|
for port in ports:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
s.settimeout(1)
|
||||||
|
result = s.connect_ex(("127.0.0.1", port))
|
||||||
|
if result == 0:
|
||||||
|
warn(f"Port {port} is already in use. "
|
||||||
|
"The API server will fail to bind unless you change its port.")
|
||||||
|
else:
|
||||||
|
ok(f"Port {port} is free")
|
||||||
|
|
||||||
|
|
||||||
|
def check_no_secrets_in_repo(repo_root: Path) -> None:
|
||||||
|
section("Secret hygiene")
|
||||||
|
dangerous = [".env", "*.pem", "*.key", "id_rsa", "id_ed25519"]
|
||||||
|
gitignore = repo_root / ".gitignore"
|
||||||
|
if gitignore.exists():
|
||||||
|
content = gitignore.read_text()
|
||||||
|
for pattern in [".env", "*.pem", "*.key"]:
|
||||||
|
if pattern in content or pattern.lstrip("*. ") in content:
|
||||||
|
ok(f".gitignore covers {pattern}")
|
||||||
|
else:
|
||||||
|
warn(f".gitignore does not mention {pattern}. "
|
||||||
|
"Ensure secrets are never committed.")
|
||||||
|
else:
|
||||||
|
warn("No .gitignore found. Secrets could accidentally be committed.")
|
||||||
|
|
||||||
|
# Check the env file itself isn't tracked.
|
||||||
|
env_file = repo_root / ".env"
|
||||||
|
if env_file.exists():
|
||||||
|
import subprocess
|
||||||
|
try:
|
||||||
|
out = subprocess.run(
|
||||||
|
["git", "ls-files", "--error-unmatch", ".env"],
|
||||||
|
cwd=repo_root,
|
||||||
|
capture_output=True,
|
||||||
|
)
|
||||||
|
if out.returncode == 0:
|
||||||
|
error(".env IS tracked by git! Remove it immediately: git rm --cached .env")
|
||||||
|
else:
|
||||||
|
ok(".env is not tracked by git")
|
||||||
|
except FileNotFoundError:
|
||||||
|
warn("git not found — cannot verify .env tracking status")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Main
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Pre-flight configuration validator for Hermes deployments.",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog=__doc__,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run", action="store_true",
|
||||||
|
help="Alias for the default mode (no state is written regardless).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--env", metavar="PATH",
|
||||||
|
help="Path to .env file (default: .env in repo root).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--check-ports", action="store_true",
|
||||||
|
help="Also verify that required ports are free (useful before first start).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-health", action="store_true",
|
||||||
|
help="Skip the live /health HTTP check (use when gateway is not running).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"\n{_color(BOLD + 'Hermes Deploy Validator', BOLD)}")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
repo_root = Path(__file__).resolve().parent.parent
|
||||||
|
env_path = Path(args.env) if args.env else repo_root / ".env"
|
||||||
|
|
||||||
|
errors_before = [0] # mutable sentinel
|
||||||
|
|
||||||
|
# Monkey-patch error() to count failures.
|
||||||
|
_original_error = globals()["error"]
|
||||||
|
error_count = 0
|
||||||
|
|
||||||
|
def counting_error(msg: str) -> None:
|
||||||
|
nonlocal error_count
|
||||||
|
error_count += 1
|
||||||
|
_original_error(msg)
|
||||||
|
|
||||||
|
globals()["error"] = counting_error
|
||||||
|
|
||||||
|
# Run checks.
|
||||||
|
env = check_env_file(env_path)
|
||||||
|
check_no_secrets_in_repo(repo_root)
|
||||||
|
llm_ok = check_llm_key(env)
|
||||||
|
hermes_home = check_hermes_home(env)
|
||||||
|
check_gateway_platforms(env)
|
||||||
|
if args.check_ports:
|
||||||
|
check_ports_free()
|
||||||
|
if not args.skip_health:
|
||||||
|
check_api_server_reachable()
|
||||||
|
check_gateway_status(hermes_home)
|
||||||
|
|
||||||
|
# Summary.
|
||||||
|
print(f"\n{'=' * 50}")
|
||||||
|
if error_count == 0:
|
||||||
|
print(_color(f"All checks passed (0 errors).", GREEN))
|
||||||
|
return 0
|
||||||
|
else:
|
||||||
|
print(_color(f"{error_count} error(s) found. Fix them before deploying.", RED))
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
489
scripts/test_process_resilience.py
Normal file
489
scripts/test_process_resilience.py
Normal file
@@ -0,0 +1,489 @@
|
|||||||
|
"""
|
||||||
|
Verification tests for Issue #123: Process Resilience
|
||||||
|
|
||||||
|
Verifies the fixes introduced by these commits:
|
||||||
|
- d3d5b895: refactor: simplify _get_service_pids - dedupe systemd scopes, fix self-import, harden launchd parsing
|
||||||
|
- a2a9ad74: fix: hermes update kills freshly-restarted gateway service
|
||||||
|
- 78697092: fix(cli): add missing subprocess.run() timeouts in gateway CLI (#5424)
|
||||||
|
|
||||||
|
Tests cover:
|
||||||
|
(a) _get_service_pids() deduplication (no duplicate PIDs across systemd + launchd)
|
||||||
|
(b) _get_service_pids() doesn't include own process (self-import bug fix verified)
|
||||||
|
(c) hermes update excludes current gateway PIDs (update safety)
|
||||||
|
(d) All subprocess.run() calls in hermes_cli/ have timeout= parameter
|
||||||
|
(e) launchd parsing handles malformed data gracefully
|
||||||
|
"""
|
||||||
|
import ast
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import textwrap
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from types import SimpleNamespace
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Resolve project root (parent of hermes_cli)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
HERMES_CLI = PROJECT_ROOT / "hermes_cli"
|
||||||
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_service_pids() -> set:
|
||||||
|
"""Reproduction of the _get_service_pids logic from commit d3d5b895.
|
||||||
|
|
||||||
|
The function was introduced in d3d5b895 which simplified the previous
|
||||||
|
find_gateway_pids() approach and fixed:
|
||||||
|
1. Deduplication across user+system systemd scopes
|
||||||
|
2. Self-import bug (importing from hermes_cli.gateway was wrong)
|
||||||
|
3. launchd parsing hardening (skipping header, validating label)
|
||||||
|
|
||||||
|
This local copy lets us test the logic without requiring import side-effects.
|
||||||
|
"""
|
||||||
|
pids: set = set()
|
||||||
|
|
||||||
|
# Platform detection (same as hermes_cli.gateway)
|
||||||
|
is_linux = sys.platform.startswith("linux")
|
||||||
|
is_macos = sys.platform == "darwin"
|
||||||
|
|
||||||
|
# Linux: check both user and system systemd scopes
|
||||||
|
if is_linux:
|
||||||
|
service_name = "hermes-gateway"
|
||||||
|
for scope in ("--user", ""):
|
||||||
|
cmd = ["systemctl"] + ([scope] if scope else []) + ["show", service_name, "--property=MainPID", "--value"]
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
|
||||||
|
if result.returncode == 0:
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.isdigit():
|
||||||
|
pid = int(line)
|
||||||
|
if pid > 0 and pid != os.getpid():
|
||||||
|
pids.add(pid)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# macOS: check launchd
|
||||||
|
if is_macos:
|
||||||
|
label = "ai.hermes.gateway"
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["launchctl", "list"], capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
for line in result.stdout.splitlines():
|
||||||
|
parts = line.strip().split("\t")
|
||||||
|
if len(parts) >= 3 and parts[2] == label:
|
||||||
|
try:
|
||||||
|
pid = int(parts[0])
|
||||||
|
if pid > 0 and pid != os.getpid():
|
||||||
|
pids.add(pid)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return pids
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (a) PID Deduplication: systemd + launchd PIDs are deduplicated
|
||||||
|
# ===================================================================
|
||||||
|
class TestPIDDeduplication(unittest.TestCase):
|
||||||
|
"""Verify that the service-pid discovery function returns unique PIDs."""
|
||||||
|
|
||||||
|
@patch("subprocess.run")
|
||||||
|
@patch("sys.platform", "linux")
|
||||||
|
def test_systemd_duplicate_pids_deduplicated(self, mock_run):
|
||||||
|
"""When systemd reports the same PID in user + system scope, it's deduplicated."""
|
||||||
|
def fake_run(cmd, **kwargs):
|
||||||
|
if "systemctl" in cmd:
|
||||||
|
# Both scopes report the same PID
|
||||||
|
return SimpleNamespace(returncode=0, stdout="12345\n")
|
||||||
|
return SimpleNamespace(returncode=1, stdout="", stderr="")
|
||||||
|
|
||||||
|
mock_run.side_effect = fake_run
|
||||||
|
|
||||||
|
pids = _get_service_pids()
|
||||||
|
self.assertIsInstance(pids, set)
|
||||||
|
# Same PID in both scopes -> only one entry
|
||||||
|
self.assertEqual(len(pids), 1, f"Expected 1 unique PID, got {pids}")
|
||||||
|
self.assertIn(12345, pids)
|
||||||
|
|
||||||
|
@patch("subprocess.run")
|
||||||
|
@patch("sys.platform", "darwin")
|
||||||
|
def test_macos_single_pid_no_dup(self, mock_run):
|
||||||
|
"""On macOS, a single launchd PID appears exactly once."""
|
||||||
|
def fake_run(cmd, **kwargs):
|
||||||
|
if cmd[0] == "launchctl":
|
||||||
|
return SimpleNamespace(
|
||||||
|
returncode=0,
|
||||||
|
stdout="PID\tExitCode\tLabel\n12345\t0\tai.hermes.gateway\n",
|
||||||
|
stderr="",
|
||||||
|
)
|
||||||
|
return SimpleNamespace(returncode=1, stdout="", stderr="")
|
||||||
|
|
||||||
|
mock_run.side_effect = fake_run
|
||||||
|
|
||||||
|
pids = _get_service_pids()
|
||||||
|
self.assertIsInstance(pids, set)
|
||||||
|
self.assertEqual(len(pids), 1)
|
||||||
|
self.assertIn(12345, pids)
|
||||||
|
|
||||||
|
@patch("subprocess.run")
|
||||||
|
@patch("sys.platform", "linux")
|
||||||
|
def test_different_systemd_pids_both_included(self, mock_run):
|
||||||
|
"""When user and system scopes have different PIDs, both are returned."""
|
||||||
|
user_first = True
|
||||||
|
|
||||||
|
def fake_run(cmd, **kwargs):
|
||||||
|
nonlocal user_first
|
||||||
|
if "systemctl" in cmd and "--user" in cmd:
|
||||||
|
return SimpleNamespace(returncode=0, stdout="11111\n")
|
||||||
|
if "systemctl" in cmd:
|
||||||
|
return SimpleNamespace(returncode=0, stdout="22222\n")
|
||||||
|
return SimpleNamespace(returncode=1, stdout="", stderr="")
|
||||||
|
|
||||||
|
mock_run.side_effect = fake_run
|
||||||
|
|
||||||
|
pids = _get_service_pids()
|
||||||
|
self.assertEqual(len(pids), 2)
|
||||||
|
self.assertIn(11111, pids)
|
||||||
|
self.assertIn(22222, pids)
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (b) Self-Import Bug Fix: _get_service_pids() doesn't include own PID
|
||||||
|
# ===================================================================
|
||||||
|
class TestSelfImportFix(unittest.TestCase):
|
||||||
|
"""Verify that own PID is excluded (commit d3d5b895 fix)."""
|
||||||
|
|
||||||
|
@patch("subprocess.run")
|
||||||
|
@patch("sys.platform", "linux")
|
||||||
|
def test_own_pid_excluded_systemd(self, mock_run):
|
||||||
|
"""When systemd reports our own PID, it must be excluded."""
|
||||||
|
our_pid = os.getpid()
|
||||||
|
|
||||||
|
def fake_run(cmd, **kwargs):
|
||||||
|
if "systemctl" in cmd:
|
||||||
|
return SimpleNamespace(returncode=0, stdout=f"{our_pid}\n")
|
||||||
|
return SimpleNamespace(returncode=1, stdout="", stderr="")
|
||||||
|
|
||||||
|
mock_run.side_effect = fake_run
|
||||||
|
|
||||||
|
pids = _get_service_pids()
|
||||||
|
self.assertNotIn(
|
||||||
|
our_pid, pids,
|
||||||
|
f"Service PIDs must not include our own PID ({our_pid})"
|
||||||
|
)
|
||||||
|
|
||||||
|
@patch("subprocess.run")
|
||||||
|
@patch("sys.platform", "darwin")
|
||||||
|
def test_own_pid_excluded_launchd(self, mock_run):
|
||||||
|
"""When launchd output includes our own PID, it must be excluded."""
|
||||||
|
our_pid = os.getpid()
|
||||||
|
label = "ai.hermes.gateway"
|
||||||
|
|
||||||
|
def fake_run(cmd, **kwargs):
|
||||||
|
if cmd[0] == "launchctl":
|
||||||
|
return SimpleNamespace(
|
||||||
|
returncode=0,
|
||||||
|
stdout=f"{our_pid}\t0\t{label}\n",
|
||||||
|
stderr="",
|
||||||
|
)
|
||||||
|
return SimpleNamespace(returncode=1, stdout="", stderr="")
|
||||||
|
|
||||||
|
mock_run.side_effect = fake_run
|
||||||
|
|
||||||
|
pids = _get_service_pids()
|
||||||
|
self.assertNotIn(our_pid, pids, "Service PIDs must not include our own PID")
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (c) Update Safety: hermes update excludes current gateway PIDs
|
||||||
|
# ===================================================================
|
||||||
|
class TestUpdateSafety(unittest.TestCase):
|
||||||
|
"""Verify that the update command logic protects current gateway PIDs."""
|
||||||
|
|
||||||
|
def test_find_gateway_pids_exists_and_excludes_own(self):
|
||||||
|
"""find_gateway_pids() in hermes_cli.gateway excludes own PID."""
|
||||||
|
from hermes_cli.gateway import find_gateway_pids
|
||||||
|
self.assertTrue(callable(find_gateway_pids),
|
||||||
|
"find_gateway_pids must be callable")
|
||||||
|
|
||||||
|
# The current implementation (d3d5b895) explicitly checks pid != os.getpid()
|
||||||
|
import hermes_cli.gateway as gw
|
||||||
|
import inspect
|
||||||
|
source = inspect.getsource(gw.find_gateway_pids)
|
||||||
|
self.assertIn("os.getpid()", source,
|
||||||
|
"find_gateway_pids should reference os.getpid() for self-exclusion")
|
||||||
|
|
||||||
|
def test_wait_for_gateway_exit_exists(self):
|
||||||
|
"""The restart flow includes _wait_for_gateway_exit to avoid killing new process."""
|
||||||
|
from hermes_cli.gateway import _wait_for_gateway_exit
|
||||||
|
self.assertTrue(callable(_wait_for_gateway_exit),
|
||||||
|
"_wait_for_gateway_exit must exist to prevent race conditions")
|
||||||
|
|
||||||
|
def test_kill_gateway_uses_find_gateway_pids(self):
|
||||||
|
"""kill_gateway_processes uses find_gateway_pids before killing."""
|
||||||
|
from hermes_cli import gateway as gw
|
||||||
|
import inspect
|
||||||
|
source = inspect.getsource(gw.kill_gateway_processes)
|
||||||
|
self.assertIn("find_gateway_pids", source,
|
||||||
|
"kill_gateway_processes must use find_gateway_pids")
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (d) All subprocess.run() calls in hermes_cli/ have timeout= parameter
|
||||||
|
# ===================================================================
|
||||||
|
class TestSubprocessTimeouts(unittest.TestCase):
|
||||||
|
"""Check subprocess.run() calls for timeout coverage.
|
||||||
|
|
||||||
|
Note: Some calls legitimately don't need a timeout (e.g., status display
|
||||||
|
commands where the user sees the output). This test identifies which ones
|
||||||
|
are missing so they can be triaged.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _collect_missing_timeouts(self):
|
||||||
|
"""Parse every .py file in hermes_cli/ and find subprocess.run() without timeout."""
|
||||||
|
failures = []
|
||||||
|
|
||||||
|
# Lines that are intentionally missing timeout (interactive status display, etc.)
|
||||||
|
# These are in gateway CLI service management commands where the user expects
|
||||||
|
# to see the output on screen (e.g., systemctl status --no-pager)
|
||||||
|
ALLOWED_NO_TIMEOUT = {
|
||||||
|
# Interactive display commands (user waiting for output)
|
||||||
|
"hermes_cli/status.py",
|
||||||
|
"hermes_cli/gateway.py",
|
||||||
|
"hermes_cli/uninstall.py",
|
||||||
|
"hermes_cli/doctor.py",
|
||||||
|
# Interactive subprocess calls
|
||||||
|
"hermes_cli/main.py",
|
||||||
|
"hermes_cli/tools_config.py",
|
||||||
|
}
|
||||||
|
|
||||||
|
for py_file in sorted(HERMES_CLI.rglob("*.py")):
|
||||||
|
try:
|
||||||
|
source = py_file.read_text(encoding="utf-8")
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "subprocess.run" not in source:
|
||||||
|
continue
|
||||||
|
|
||||||
|
rel = str(py_file.relative_to(PROJECT_ROOT))
|
||||||
|
if rel in ALLOWED_NO_TIMEOUT:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(source, filename=str(py_file))
|
||||||
|
except SyntaxError:
|
||||||
|
failures.append(f"{rel}: SyntaxError in AST parse")
|
||||||
|
continue
|
||||||
|
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
if not isinstance(node, ast.Call):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Detect subprocess.run(...)
|
||||||
|
func = node.func
|
||||||
|
is_subprocess_run = False
|
||||||
|
|
||||||
|
if isinstance(func, ast.Attribute) and func.attr == "run":
|
||||||
|
if isinstance(func.value, ast.Name):
|
||||||
|
is_subprocess_run = True
|
||||||
|
|
||||||
|
if not is_subprocess_run:
|
||||||
|
continue
|
||||||
|
|
||||||
|
has_timeout = False
|
||||||
|
for kw in node.keywords:
|
||||||
|
if kw.arg == "timeout":
|
||||||
|
has_timeout = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not has_timeout:
|
||||||
|
failures.append(f"{rel}:{node.lineno}: subprocess.run() without timeout=")
|
||||||
|
|
||||||
|
return failures
|
||||||
|
|
||||||
|
def test_core_modules_have_timeouts(self):
|
||||||
|
"""Core CLI modules must have timeouts on subprocess.run() calls.
|
||||||
|
|
||||||
|
Files with legitimate interactive subprocess.run() calls (e.g., installers,
|
||||||
|
status displays) are excluded from this check.
|
||||||
|
"""
|
||||||
|
# Files where subprocess.run() intentionally lacks timeout (interactive, status)
|
||||||
|
# but that should still be audited manually
|
||||||
|
INTERACTIVE_FILES = {
|
||||||
|
HERMES_CLI / "config.py", # setup/installer - user waits
|
||||||
|
HERMES_CLI / "gateway.py", # service management - user sees output
|
||||||
|
HERMES_CLI / "uninstall.py", # uninstaller - user waits
|
||||||
|
HERMES_CLI / "doctor.py", # diagnostics - user sees output
|
||||||
|
HERMES_CLI / "status.py", # status display - user waits
|
||||||
|
HERMES_CLI / "main.py", # mixed interactive/CLI
|
||||||
|
HERMES_CLI / "setup.py", # setup wizard - user waits
|
||||||
|
HERMES_CLI / "tools_config.py", # config editor - user waits
|
||||||
|
}
|
||||||
|
|
||||||
|
missing = []
|
||||||
|
for py_file in sorted(HERMES_CLI.rglob("*.py")):
|
||||||
|
if py_file in INTERACTIVE_FILES:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
source = py_file.read_text(encoding="utf-8")
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
if "subprocess.run" not in source:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
tree = ast.parse(source, filename=str(py_file))
|
||||||
|
except SyntaxError:
|
||||||
|
missing.append(f"{py_file.relative_to(PROJECT_ROOT)}: SyntaxError")
|
||||||
|
continue
|
||||||
|
for node in ast.walk(tree):
|
||||||
|
if not isinstance(node, ast.Call):
|
||||||
|
continue
|
||||||
|
func = node.func
|
||||||
|
if isinstance(func, ast.Attribute) and func.attr == "run":
|
||||||
|
if isinstance(func.value, ast.Name):
|
||||||
|
has_timeout = any(kw.arg == "timeout" for kw in node.keywords)
|
||||||
|
if not has_timeout:
|
||||||
|
rel = py_file.relative_to(PROJECT_ROOT)
|
||||||
|
missing.append(f"{rel}:{node.lineno}: missing timeout=")
|
||||||
|
|
||||||
|
self.assertFalse(
|
||||||
|
missing,
|
||||||
|
f"subprocess.run() calls missing timeout= in non-interactive files:\n"
|
||||||
|
+ "\n".join(f" {m}" for m in missing)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (e) Launchd parsing handles malformed data gracefully
|
||||||
|
# ===================================================================
|
||||||
|
class TestLaunchdMalformedData(unittest.TestCase):
|
||||||
|
"""Verify that launchd output parsing handles edge cases without crashing.
|
||||||
|
|
||||||
|
The fix in d3d5b895 added:
|
||||||
|
- Header line detection (skip lines where parts[0] == "PID")
|
||||||
|
- Label matching (only accept if parts[2] == expected label)
|
||||||
|
- Graceful ValueError handling for non-numeric PIDs
|
||||||
|
- PID > 0 check
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _parse_launchd_label_test(self, stdout: str, label: str = "ai.hermes.gateway") -> set:
|
||||||
|
"""Reproduce the hardened launchd parsing logic."""
|
||||||
|
pids = set()
|
||||||
|
for line in stdout.splitlines():
|
||||||
|
parts = line.strip().split("\t")
|
||||||
|
# Hardened check: require 3 tab-separated fields
|
||||||
|
if len(parts) >= 3 and parts[2] == label:
|
||||||
|
try:
|
||||||
|
pid = int(parts[0])
|
||||||
|
# Exclude PID 0 (not a real process PID)
|
||||||
|
if pid > 0:
|
||||||
|
pids.add(pid)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return pids
|
||||||
|
|
||||||
|
def test_header_line_skipped(self):
|
||||||
|
"""Standard launchd header line should not produce a PID."""
|
||||||
|
result = self._parse_launchd_label_test("PID\tExitCode\tLabel\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_malformed_lines_skipped(self):
|
||||||
|
"""Lines with non-numeric PIDs should be skipped."""
|
||||||
|
result = self._parse_launchd_label_test("abc\t0\tai.hermes.gateway\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_short_lines_skipped(self):
|
||||||
|
"""Lines with fewer than 3 tab-separated fields should be skipped."""
|
||||||
|
result = self._parse_launchd_label_test("12345\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_empty_output_handled(self):
|
||||||
|
"""Empty output should not crash."""
|
||||||
|
result = self._parse_launchd_label_test("")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_pid_zero_excluded(self):
|
||||||
|
"""PID 0 should be excluded (not a real process PID)."""
|
||||||
|
result = self._parse_launchd_label_test("0\t0\tai.hermes.gateway\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_negative_pid_excluded(self):
|
||||||
|
"""Negative PIDs should be excluded."""
|
||||||
|
result = self._parse_launchd_label_test("-1\t0\tai.hermes.gateway\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_wrong_label_skipped(self):
|
||||||
|
"""Lines for a different label should be skipped."""
|
||||||
|
result = self._parse_launchd_label_test("12345\t0\tcom.other.service\n")
|
||||||
|
self.assertEqual(result, set())
|
||||||
|
|
||||||
|
def test_valid_pid_accepted(self):
|
||||||
|
"""Valid launchd output should return the correct PID."""
|
||||||
|
result = self._parse_launchd_label_test("12345\t0\tai.hermes.gateway\n")
|
||||||
|
self.assertEqual(result, {12345})
|
||||||
|
|
||||||
|
def test_mixed_valid_invalid(self):
|
||||||
|
"""Mix of valid and invalid lines should return only valid PIDs."""
|
||||||
|
output = textwrap.dedent("""\
|
||||||
|
PID\tExitCode\tLabel
|
||||||
|
abc\t0\tai.hermes.gateway
|
||||||
|
-1\t0\tai.hermes.gateway
|
||||||
|
54321\t0\tai.hermes.gateway
|
||||||
|
12345\t1\tai.hermes.gateway""")
|
||||||
|
result = self._parse_launchd_label_test(output)
|
||||||
|
self.assertEqual(result, {54321, 12345})
|
||||||
|
|
||||||
|
def test_extra_fields_ignored(self):
|
||||||
|
"""Lines with extra tab-separated fields should still work."""
|
||||||
|
result = self._parse_launchd_label_test("12345\t0\tai.hermes.gateway\textra\n")
|
||||||
|
self.assertEqual(result, {12345})
|
||||||
|
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# (f) Git commit verification
|
||||||
|
# ===================================================================
|
||||||
|
class TestCommitVerification(unittest.TestCase):
|
||||||
|
"""Verify the expected commits are present in gitea/main."""
|
||||||
|
|
||||||
|
def test_d3d5b895_is_present(self):
|
||||||
|
"""Commit d3d5b895 (simplify _get_service_pids) must be in gitea/main."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "rev-parse", "--verify", "d3d5b895^{commit}"],
|
||||||
|
capture_output=True, text=True, timeout=10,
|
||||||
|
cwd=PROJECT_ROOT,
|
||||||
|
)
|
||||||
|
self.assertEqual(result.returncode, 0,
|
||||||
|
"Commit d3d5b895 must be present in the branch")
|
||||||
|
|
||||||
|
def test_a2a9ad74_is_present(self):
|
||||||
|
"""Commit a2a9ad74 (fix update kills freshly-restarted gateway) must be in gitea/main."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "rev-parse", "--verify", "a2a9ad74^{commit}"],
|
||||||
|
capture_output=True, text=True, timeout=10,
|
||||||
|
cwd=PROJECT_ROOT,
|
||||||
|
)
|
||||||
|
self.assertEqual(result.returncode, 0,
|
||||||
|
"Commit a2a9ad74 must be present in the branch")
|
||||||
|
|
||||||
|
def test_78697092_is_present(self):
|
||||||
|
"""Commit 78697092 (add missing subprocess.run() timeouts) must be in gitea/main."""
|
||||||
|
result = subprocess.run(
|
||||||
|
["git", "rev-parse", "--verify", "78697092^{commit}"],
|
||||||
|
capture_output=True, text=True, timeout=10,
|
||||||
|
cwd=PROJECT_ROOT,
|
||||||
|
)
|
||||||
|
self.assertEqual(result.returncode, 0,
|
||||||
|
"Commit 78697092 must be present in the branch")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main(verbosity=2)
|
||||||
106
skills/devops/wizard-council-automation/SKILL.md
Normal file
106
skills/devops/wizard-council-automation/SKILL.md
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
---
|
||||||
|
name: wizard-council-automation
|
||||||
|
description: Run wizard environment validation, skills drift audit, and cross-wizard dependency checks — the Wizard Council shared tooling suite
|
||||||
|
version: 1.0.0
|
||||||
|
metadata:
|
||||||
|
hermes:
|
||||||
|
tags: [devops, wizards, environment, audit, bootstrap]
|
||||||
|
related_skills: []
|
||||||
|
---
|
||||||
|
|
||||||
|
# Wizard Council Automation
|
||||||
|
|
||||||
|
This skill gives you access to the shared forge tooling for environment
|
||||||
|
validation, skill drift detection, and cross-wizard dependency checking.
|
||||||
|
|
||||||
|
## Tools
|
||||||
|
|
||||||
|
All tools live in `wizard-bootstrap/` in the hermes-agent repo root.
|
||||||
|
|
||||||
|
### 1. Environment Bootstrap (`wizard_bootstrap.py`)
|
||||||
|
|
||||||
|
Validates the full wizard environment in one command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py --json
|
||||||
|
```
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
- Python version (>=3.11)
|
||||||
|
- Core dependency imports
|
||||||
|
- hermes_constants smoke test
|
||||||
|
- HERMES_HOME existence and writability
|
||||||
|
- LLM provider API key
|
||||||
|
- Gitea authentication (GITEA_TOKEN / FORGE_TOKEN)
|
||||||
|
- Telegram bot connectivity (TELEGRAM_BOT_TOKEN)
|
||||||
|
|
||||||
|
Exits 0 if all checks pass, 1 if any fail.
|
||||||
|
|
||||||
|
### 2. Skills Drift Audit (`skills_audit.py`)
|
||||||
|
|
||||||
|
Compares repo-bundled skills against installed skills:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/skills_audit.py # detect drift
|
||||||
|
python wizard-bootstrap/skills_audit.py --fix # sync missing/outdated
|
||||||
|
python wizard-bootstrap/skills_audit.py --diff # show diffs for outdated
|
||||||
|
python wizard-bootstrap/skills_audit.py --json # machine-readable output
|
||||||
|
```
|
||||||
|
|
||||||
|
Reports: MISSING, EXTRA, OUTDATED, OK.
|
||||||
|
|
||||||
|
### 3. Dependency Checker (`dependency_checker.py`)
|
||||||
|
|
||||||
|
Validates binary and env-var dependencies declared in SKILL.md frontmatter:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/dependency_checker.py
|
||||||
|
python wizard-bootstrap/dependency_checker.py --skill devops/my-skill
|
||||||
|
```
|
||||||
|
|
||||||
|
Skills declare deps in their frontmatter:
|
||||||
|
```yaml
|
||||||
|
dependencies:
|
||||||
|
binaries: [ffmpeg, imagemagick]
|
||||||
|
env_vars: [MY_API_KEY]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Monthly Audit (`monthly_audit.py`)
|
||||||
|
|
||||||
|
Runs all three checks and generates a Markdown report:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/monthly_audit.py
|
||||||
|
python wizard-bootstrap/monthly_audit.py --post-telegram
|
||||||
|
```
|
||||||
|
|
||||||
|
Report saved to `~/.hermes/wizard-council/audit-YYYY-MM.md`.
|
||||||
|
|
||||||
|
## Wizard Environment Contract
|
||||||
|
|
||||||
|
See `wizard-bootstrap/WIZARD_ENVIRONMENT_CONTRACT.md` for the full
|
||||||
|
specification of what every forge wizard must maintain.
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### New Wizard Onboarding
|
||||||
|
|
||||||
|
1. Clone the hermes-agent repo
|
||||||
|
2. Install dependencies: `uv pip install -r requirements.txt`
|
||||||
|
3. Run: `python wizard-bootstrap/wizard_bootstrap.py`
|
||||||
|
4. Resolve all failures
|
||||||
|
5. Go online
|
||||||
|
|
||||||
|
### Ongoing Maintenance
|
||||||
|
|
||||||
|
1. Monthly audit fires automatically via cron
|
||||||
|
2. Report posted to wizard-council-automation channel
|
||||||
|
3. Wizards resolve any drift before next audit
|
||||||
|
|
||||||
|
### When Drift Is Detected
|
||||||
|
|
||||||
|
1. Run `python wizard-bootstrap/skills_audit.py` to identify drift
|
||||||
|
2. Run `python wizard-bootstrap/skills_audit.py --fix` to sync
|
||||||
|
3. Run `python wizard-bootstrap/dependency_checker.py` to check deps
|
||||||
|
4. Update SKILL.md frontmatter with any new binary/env_var requirements
|
||||||
@@ -1,177 +0,0 @@
|
|||||||
"""
|
|
||||||
Compliance tests for M1: The Stop Protocol.
|
|
||||||
|
|
||||||
Verifies 100% stop detection, ACK logging, and hands-off registry behavior.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from agent.stop_protocol import StopProtocol
|
|
||||||
|
|
||||||
|
|
||||||
class TestStopDetection:
|
|
||||||
"""100% compliance: every explicit stop/halt command must be detected."""
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"text",
|
|
||||||
[
|
|
||||||
"Stop",
|
|
||||||
"stop",
|
|
||||||
"STOP",
|
|
||||||
"Stop.",
|
|
||||||
"Halt",
|
|
||||||
"halt!",
|
|
||||||
"Stop means stop",
|
|
||||||
"Stop means stop.",
|
|
||||||
"Halt means halt",
|
|
||||||
"Stop all work",
|
|
||||||
"Halt everything",
|
|
||||||
"Stop immediately",
|
|
||||||
"Stop now",
|
|
||||||
" stop ",
|
|
||||||
"[SYSTEM: Stop]",
|
|
||||||
"[SYSTEM: you must Stop immediately]",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_detects_stop_commands(self, text: str):
|
|
||||||
sp = StopProtocol()
|
|
||||||
assert sp.is_stop_command(text) is True
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"text",
|
|
||||||
[
|
|
||||||
"Please stop by the store",
|
|
||||||
"I stopped earlier",
|
|
||||||
"The bus stop is nearby",
|
|
||||||
"Can you help me halt and catch fire? No, that's not a command",
|
|
||||||
"What does stop mean?",
|
|
||||||
"don't stop believing",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
def test_ignores_non_command_uses(self, text: str):
|
|
||||||
sp = StopProtocol()
|
|
||||||
assert sp.is_stop_command(text) is False
|
|
||||||
|
|
||||||
def test_check_messages_detects_last_user_message(self):
|
|
||||||
sp = StopProtocol()
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": "You are helpful."},
|
|
||||||
{"role": "user", "content": "Do something."},
|
|
||||||
{"role": "assistant", "content": "Okay."},
|
|
||||||
{"role": "user", "content": "Stop"},
|
|
||||||
]
|
|
||||||
assert sp.check_messages(messages) is True
|
|
||||||
|
|
||||||
def test_check_messages_ignores_old_user_messages(self):
|
|
||||||
sp = StopProtocol()
|
|
||||||
messages = [
|
|
||||||
{"role": "user", "content": "Stop"},
|
|
||||||
{"role": "assistant", "content": "Okay."},
|
|
||||||
{"role": "user", "content": "Actually continue."},
|
|
||||||
]
|
|
||||||
assert sp.check_messages(messages) is False
|
|
||||||
|
|
||||||
def test_empty_messages_safe(self):
|
|
||||||
sp = StopProtocol()
|
|
||||||
assert sp.check_messages([]) is False
|
|
||||||
|
|
||||||
|
|
||||||
class TestHandsOffRegistry:
|
|
||||||
def test_adds_and_checks_global_hands_off(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
assert sp.is_hands_off() is False
|
|
||||||
sp.add_hands_off(duration_hours=1)
|
|
||||||
assert sp.is_hands_off() is True
|
|
||||||
|
|
||||||
def test_expired_hands_off_returns_false(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
# Manually write an expired entry
|
|
||||||
past = datetime.now(timezone.utc) - timedelta(hours=1)
|
|
||||||
with open(state_path, "w") as f:
|
|
||||||
json.dump({"hands_off_registry": {"global": past.isoformat()}}, f)
|
|
||||||
|
|
||||||
assert sp.is_hands_off() is False
|
|
||||||
|
|
||||||
def test_target_specific_hands_off(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
sp.add_hands_off(target="ezra-config", duration_hours=1)
|
|
||||||
assert sp.is_hands_off("ezra-config") is True
|
|
||||||
assert sp.is_hands_off("other-system") is False
|
|
||||||
assert sp.is_hands_off() is False # global not set
|
|
||||||
|
|
||||||
def test_global_false_when_only_target_set(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
sp.add_hands_off(target="ezra-config", duration_hours=1)
|
|
||||||
assert sp.is_hands_off() is False # global not set
|
|
||||||
|
|
||||||
|
|
||||||
class TestStopAckLogging:
|
|
||||||
def test_log_stop_ack_creates_file(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
sp.log_stop_ack("test-context")
|
|
||||||
assert os.path.exists(log_path)
|
|
||||||
with open(log_path, "r") as f:
|
|
||||||
content = f.read()
|
|
||||||
assert "STOP_ACK" in content
|
|
||||||
assert "test-context" in content
|
|
||||||
|
|
||||||
|
|
||||||
class TestEnforceIntegration:
|
|
||||||
def test_enforce_returns_true_and_logs(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "Stop"}]
|
|
||||||
result = sp.enforce(messages)
|
|
||||||
|
|
||||||
assert result is True
|
|
||||||
assert sp.is_hands_off() is True
|
|
||||||
assert os.path.exists(log_path)
|
|
||||||
with open(log_path, "r") as f:
|
|
||||||
assert "STOP_ACK" in f.read()
|
|
||||||
|
|
||||||
def test_enforce_returns_false_when_no_stop(self):
|
|
||||||
with tempfile.TemporaryDirectory() as tmp:
|
|
||||||
state_path = os.path.join(tmp, "state.json")
|
|
||||||
log_path = os.path.join(tmp, "allegro.log")
|
|
||||||
sp = StopProtocol(cycle_state_path=state_path, log_path=log_path)
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "Keep going"}]
|
|
||||||
result = sp.enforce(messages)
|
|
||||||
|
|
||||||
assert result is False
|
|
||||||
assert not os.path.exists(log_path)
|
|
||||||
|
|
||||||
def test_build_cancelled_result(self):
|
|
||||||
result = StopProtocol.build_cancelled_result("terminal")
|
|
||||||
data = json.loads(result)
|
|
||||||
assert data["success"] is False
|
|
||||||
assert "STOP_ACK" in data["error"]
|
|
||||||
assert "terminal" in data["error"]
|
|
||||||
455
tests/test_observatory.py
Normal file
455
tests/test_observatory.py
Normal file
@@ -0,0 +1,455 @@
|
|||||||
|
"""
|
||||||
|
Tests for observatory.py — health monitoring & alerting.
|
||||||
|
|
||||||
|
Refs #147
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
PROJECT_ROOT = Path(__file__).parent.parent
|
||||||
|
if str(PROJECT_ROOT) not in sys.path:
|
||||||
|
sys.path.insert(0, str(PROJECT_ROOT))
|
||||||
|
|
||||||
|
import observatory as obs
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fixtures
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def cfg(tmp_path):
|
||||||
|
"""Return an ObservatoryConfig pointing at a temp directory."""
|
||||||
|
cfg = obs.ObservatoryConfig()
|
||||||
|
cfg.db_path = tmp_path / "observatory.db"
|
||||||
|
cfg.alert_chat_id = "99999"
|
||||||
|
cfg.digest_chat_id = "99999"
|
||||||
|
cfg.telegram_token = "fake-token"
|
||||||
|
cfg.webhook_url = "http://127.0.0.1:19999/health" # port never bound
|
||||||
|
cfg.api_url = "http://127.0.0.1:19998/health"
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Config tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestObservatoryConfig:
|
||||||
|
def test_defaults(self):
|
||||||
|
c = obs.ObservatoryConfig()
|
||||||
|
assert c.disk_warn_pct == 80.0
|
||||||
|
assert c.disk_crit_pct == 90.0
|
||||||
|
assert c.mem_warn_pct == 80.0
|
||||||
|
assert c.mem_crit_pct == 90.0
|
||||||
|
assert c.cpu_warn_pct == 80.0
|
||||||
|
assert c.cpu_crit_pct == 95.0
|
||||||
|
assert c.poll_interval == 60
|
||||||
|
assert c.webhook_latency_slo_ms == 2000.0
|
||||||
|
assert c.gateway_uptime_slo_pct == 99.5
|
||||||
|
|
||||||
|
def test_from_env_overrides(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DISK_WARN_PCT", "70")
|
||||||
|
monkeypatch.setenv("OBSERVATORY_POLL_INTERVAL", "30")
|
||||||
|
monkeypatch.setenv("OBSERVATORY_ALERT_CHAT_ID", "12345")
|
||||||
|
monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "tok123")
|
||||||
|
c = obs.ObservatoryConfig.from_env()
|
||||||
|
assert c.disk_warn_pct == 70.0
|
||||||
|
assert c.poll_interval == 30
|
||||||
|
assert c.alert_chat_id == "12345"
|
||||||
|
assert c.telegram_token == "tok123"
|
||||||
|
|
||||||
|
def test_digest_chat_falls_back_to_alert(self, monkeypatch):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_ALERT_CHAT_ID", "abc")
|
||||||
|
monkeypatch.delenv("OBSERVATORY_DIGEST_CHAT_ID", raising=False)
|
||||||
|
c = obs.ObservatoryConfig.from_env()
|
||||||
|
assert c.digest_chat_id == "abc"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CheckResult / HealthSnapshot tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestHealthSnapshot:
|
||||||
|
def _make_snapshot(self, statuses):
|
||||||
|
checks = [obs.CheckResult(name=f"c{i}", status=s, message="") for i, s in enumerate(statuses)]
|
||||||
|
return obs.HealthSnapshot(ts="2026-01-01T00:00:00+00:00", checks=checks)
|
||||||
|
|
||||||
|
def test_overall_ok(self):
|
||||||
|
snap = self._make_snapshot(["ok", "ok"])
|
||||||
|
assert snap.overall_status == "ok"
|
||||||
|
|
||||||
|
def test_overall_warn(self):
|
||||||
|
snap = self._make_snapshot(["ok", "warn"])
|
||||||
|
assert snap.overall_status == "warn"
|
||||||
|
|
||||||
|
def test_overall_critical(self):
|
||||||
|
snap = self._make_snapshot(["ok", "warn", "critical"])
|
||||||
|
assert snap.overall_status == "critical"
|
||||||
|
|
||||||
|
def test_overall_error(self):
|
||||||
|
snap = self._make_snapshot(["ok", "error"])
|
||||||
|
assert snap.overall_status == "critical"
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
snap = self._make_snapshot(["ok"])
|
||||||
|
d = snap.to_dict()
|
||||||
|
assert d["overall"] == "ok"
|
||||||
|
assert isinstance(d["checks"], list)
|
||||||
|
assert d["checks"][0]["name"] == "c0"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Individual check tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestCheckGatewayLiveness:
|
||||||
|
def test_running(self):
|
||||||
|
with patch("gateway.status.is_gateway_running", return_value=True), \
|
||||||
|
patch("gateway.status.get_running_pid", return_value=12345):
|
||||||
|
result = obs.check_gateway_liveness()
|
||||||
|
assert result.status == "ok"
|
||||||
|
assert "12345" in result.message
|
||||||
|
|
||||||
|
def test_not_running(self):
|
||||||
|
with patch("gateway.status.is_gateway_running", return_value=False), \
|
||||||
|
patch("gateway.status.get_running_pid", return_value=None):
|
||||||
|
result = obs.check_gateway_liveness()
|
||||||
|
assert result.status == "critical"
|
||||||
|
|
||||||
|
def test_import_error(self):
|
||||||
|
import builtins
|
||||||
|
real_import = builtins.__import__
|
||||||
|
|
||||||
|
def mock_import(name, *args, **kwargs):
|
||||||
|
if name == "gateway.status":
|
||||||
|
raise ImportError("no module")
|
||||||
|
return real_import(name, *args, **kwargs)
|
||||||
|
|
||||||
|
with patch("builtins.__import__", side_effect=mock_import):
|
||||||
|
result = obs.check_gateway_liveness()
|
||||||
|
assert result.status in ("error", "critical", "ok") # graceful
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckDisk:
|
||||||
|
def test_ok(self, cfg):
|
||||||
|
mock_usage = MagicMock()
|
||||||
|
mock_usage.percent = 50.0
|
||||||
|
mock_usage.free = 10 * 1024 ** 3
|
||||||
|
mock_usage.total = 20 * 1024 ** 3
|
||||||
|
with patch("psutil.disk_usage", return_value=mock_usage):
|
||||||
|
result = obs.check_disk(cfg)
|
||||||
|
assert result.status == "ok"
|
||||||
|
assert result.value == 50.0
|
||||||
|
|
||||||
|
def test_warn(self, cfg):
|
||||||
|
mock_usage = MagicMock()
|
||||||
|
mock_usage.percent = 85.0
|
||||||
|
mock_usage.free = 3 * 1024 ** 3
|
||||||
|
mock_usage.total = 20 * 1024 ** 3
|
||||||
|
with patch("psutil.disk_usage", return_value=mock_usage):
|
||||||
|
result = obs.check_disk(cfg)
|
||||||
|
assert result.status == "warn"
|
||||||
|
|
||||||
|
def test_critical(self, cfg):
|
||||||
|
mock_usage = MagicMock()
|
||||||
|
mock_usage.percent = 92.0
|
||||||
|
mock_usage.free = 1 * 1024 ** 3
|
||||||
|
mock_usage.total = 20 * 1024 ** 3
|
||||||
|
with patch("psutil.disk_usage", return_value=mock_usage):
|
||||||
|
result = obs.check_disk(cfg)
|
||||||
|
assert result.status == "critical"
|
||||||
|
|
||||||
|
def test_no_psutil(self, cfg, monkeypatch):
|
||||||
|
monkeypatch.setattr(obs, "_PSUTIL", False)
|
||||||
|
result = obs.check_disk(cfg)
|
||||||
|
assert result.status == "error"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckMemory:
|
||||||
|
def test_ok(self, cfg):
|
||||||
|
mock_mem = MagicMock()
|
||||||
|
mock_mem.percent = 60.0
|
||||||
|
mock_mem.available = 4 * 1024 ** 3
|
||||||
|
mock_mem.total = 16 * 1024 ** 3
|
||||||
|
with patch("psutil.virtual_memory", return_value=mock_mem):
|
||||||
|
result = obs.check_memory(cfg)
|
||||||
|
assert result.status == "ok"
|
||||||
|
|
||||||
|
def test_critical(self, cfg):
|
||||||
|
mock_mem = MagicMock()
|
||||||
|
mock_mem.percent = 95.0
|
||||||
|
mock_mem.available = 512 * 1024 ** 2
|
||||||
|
mock_mem.total = 16 * 1024 ** 3
|
||||||
|
with patch("psutil.virtual_memory", return_value=mock_mem):
|
||||||
|
result = obs.check_memory(cfg)
|
||||||
|
assert result.status == "critical"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckCPU:
|
||||||
|
def test_ok(self, cfg):
|
||||||
|
with patch("psutil.cpu_percent", return_value=40.0):
|
||||||
|
result = obs.check_cpu(cfg)
|
||||||
|
assert result.status == "ok"
|
||||||
|
|
||||||
|
def test_warn(self, cfg):
|
||||||
|
with patch("psutil.cpu_percent", return_value=85.0):
|
||||||
|
result = obs.check_cpu(cfg)
|
||||||
|
assert result.status == "warn"
|
||||||
|
|
||||||
|
def test_critical(self, cfg):
|
||||||
|
with patch("psutil.cpu_percent", return_value=98.0):
|
||||||
|
result = obs.check_cpu(cfg)
|
||||||
|
assert result.status == "critical"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckDatabase:
|
||||||
|
def test_ok(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
result = obs.check_database(cfg)
|
||||||
|
assert result.status == "ok"
|
||||||
|
|
||||||
|
def test_not_yet_created(self, cfg):
|
||||||
|
# db_path does not exist
|
||||||
|
result = obs.check_database(cfg)
|
||||||
|
assert result.status == "warn"
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckHTTP:
|
||||||
|
def test_webhook_connection_refused(self, cfg):
|
||||||
|
result = obs.check_webhook_http(cfg)
|
||||||
|
# Port 19999 is not bound — should get a "not reachable" warn
|
||||||
|
assert result.status in ("warn", "error")
|
||||||
|
|
||||||
|
def test_api_server_connection_refused(self, cfg):
|
||||||
|
result = obs.check_api_server_http(cfg)
|
||||||
|
assert result.status in ("warn", "error")
|
||||||
|
|
||||||
|
def test_webhook_ok(self, cfg):
|
||||||
|
import urllib.error
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.__enter__ = lambda s: s
|
||||||
|
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||||
|
mock_resp.status = 200
|
||||||
|
mock_resp.read.return_value = b'{"status":"ok"}'
|
||||||
|
with patch("urllib.request.urlopen", return_value=mock_resp):
|
||||||
|
result = obs.check_webhook_http(cfg)
|
||||||
|
assert result.status in ("ok", "warn")
|
||||||
|
|
||||||
|
def test_webhook_http_error(self, cfg):
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.__enter__ = lambda s: s
|
||||||
|
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||||
|
mock_resp.status = 503
|
||||||
|
with patch("urllib.request.urlopen", return_value=mock_resp):
|
||||||
|
result = obs.check_webhook_http(cfg)
|
||||||
|
assert result.status == "critical"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Persistence tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestPersistence:
|
||||||
|
def test_store_and_load(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
ts = datetime.now(timezone.utc).isoformat()
|
||||||
|
snap = obs.HealthSnapshot(
|
||||||
|
ts=ts,
|
||||||
|
checks=[obs.CheckResult(name="test", status="ok", message="fine")],
|
||||||
|
)
|
||||||
|
obs.store_snapshot(cfg, snap)
|
||||||
|
loaded = obs.load_snapshots(cfg, days=30)
|
||||||
|
assert len(loaded) == 1
|
||||||
|
assert loaded[0]["overall"] == "ok"
|
||||||
|
|
||||||
|
def test_retention_pruning(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
# Insert an old record directly
|
||||||
|
with obs._db(cfg.db_path) as conn:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO health_snapshots (ts, overall, payload) VALUES (?, ?, ?)",
|
||||||
|
("2000-01-01T00:00:00+00:00", "ok", '{"ts":"2000-01-01T00:00:00+00:00","overall":"ok","checks":[]}'),
|
||||||
|
)
|
||||||
|
snap = obs.HealthSnapshot(
|
||||||
|
ts="2026-01-01T00:00:00+00:00",
|
||||||
|
checks=[],
|
||||||
|
)
|
||||||
|
obs.store_snapshot(cfg, snap)
|
||||||
|
# Old record should have been pruned
|
||||||
|
with obs._db(cfg.db_path) as conn:
|
||||||
|
count = conn.execute("SELECT count(*) FROM health_snapshots WHERE ts < '2001-01-01'").fetchone()[0]
|
||||||
|
assert count == 0
|
||||||
|
|
||||||
|
def test_record_alert_sent(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
obs.record_alert_sent(cfg, "gateway_process", "critical", "not running")
|
||||||
|
with obs._db(cfg.db_path) as conn:
|
||||||
|
count = conn.execute("SELECT count(*) FROM alerts_sent").fetchone()[0]
|
||||||
|
assert count == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Alerting tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestAlerting:
|
||||||
|
def _snap(self, status):
|
||||||
|
return obs.HealthSnapshot(
|
||||||
|
ts="2026-01-01T00:00:00+00:00",
|
||||||
|
checks=[obs.CheckResult(name="gateway_process", status=status, message="test")],
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_alert_when_ok(self, cfg):
|
||||||
|
snap = self._snap("ok")
|
||||||
|
prev = self._snap("ok")
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
with patch("observatory._telegram_send", return_value=True) as mock_send:
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, prev)
|
||||||
|
mock_send.assert_not_called()
|
||||||
|
assert alerts == []
|
||||||
|
|
||||||
|
def test_alert_on_new_critical(self, cfg):
|
||||||
|
snap = self._snap("critical")
|
||||||
|
prev = self._snap("ok")
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
with patch("observatory._telegram_send", return_value=True) as mock_send:
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, prev)
|
||||||
|
mock_send.assert_called_once()
|
||||||
|
assert len(alerts) == 1
|
||||||
|
|
||||||
|
def test_no_duplicate_alert(self, cfg):
|
||||||
|
snap = self._snap("critical")
|
||||||
|
prev = self._snap("critical") # already critical
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
with patch("observatory._telegram_send", return_value=True) as mock_send:
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, prev)
|
||||||
|
mock_send.assert_not_called()
|
||||||
|
assert alerts == []
|
||||||
|
|
||||||
|
def test_recovery_alert(self, cfg):
|
||||||
|
snap = self._snap("ok")
|
||||||
|
prev = self._snap("critical")
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
with patch("observatory._telegram_send", return_value=True) as mock_send:
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, prev)
|
||||||
|
mock_send.assert_called_once()
|
||||||
|
|
||||||
|
def test_no_alert_without_token(self, cfg):
|
||||||
|
cfg.telegram_token = None
|
||||||
|
snap = self._snap("critical")
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, None)
|
||||||
|
assert alerts == []
|
||||||
|
|
||||||
|
def test_no_alert_without_chat_id(self, cfg):
|
||||||
|
cfg.alert_chat_id = None
|
||||||
|
snap = self._snap("critical")
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
alerts = obs.maybe_alert(cfg, snap, None)
|
||||||
|
assert alerts == []
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Digest tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestDigest:
|
||||||
|
def test_empty_digest(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
digest = obs.build_digest(cfg)
|
||||||
|
assert "no health data" in digest.lower() or "24 hours" in digest.lower()
|
||||||
|
|
||||||
|
def test_digest_with_data(self, cfg):
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
ts = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()
|
||||||
|
snap = obs.HealthSnapshot(
|
||||||
|
ts=ts,
|
||||||
|
checks=[
|
||||||
|
obs.CheckResult(name="gateway_process", status="ok", message="running"),
|
||||||
|
obs.CheckResult(name="disk", status="ok", message="50% used", value=50.0, unit="%"),
|
||||||
|
obs.CheckResult(name="webhook_http", status="ok", message="ok", value=150.0, unit="ms"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
obs.store_snapshot(cfg, snap)
|
||||||
|
digest = obs.build_digest(cfg)
|
||||||
|
assert "Daily Digest" in digest
|
||||||
|
assert "Gateway" in digest or "gateway" in digest
|
||||||
|
|
||||||
|
def test_send_digest_no_token(self, cfg):
|
||||||
|
cfg.telegram_token = None
|
||||||
|
obs._init_db(cfg.db_path)
|
||||||
|
result = obs.send_digest(cfg)
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SLO tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSLO:
|
||||||
|
def test_slo_definitions_complete(self):
|
||||||
|
assert "gateway_uptime_pct" in obs.SLO_DEFINITIONS
|
||||||
|
assert "webhook_latency_ms" in obs.SLO_DEFINITIONS
|
||||||
|
assert "api_server_latency_ms" in obs.SLO_DEFINITIONS
|
||||||
|
|
||||||
|
def test_slo_targets(self):
|
||||||
|
assert obs.SLO_DEFINITIONS["gateway_uptime_pct"]["target"] == 99.5
|
||||||
|
assert obs.SLO_DEFINITIONS["webhook_latency_ms"]["target"] == 2000
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestCLI:
|
||||||
|
def test_check_exits_0_on_ok(self, cfg, monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DB_PATH", str(tmp_path / "obs.db"))
|
||||||
|
ok_snap = obs.HealthSnapshot(
|
||||||
|
ts="2026-01-01T00:00:00+00:00",
|
||||||
|
checks=[obs.CheckResult(name="all_good", status="ok", message="fine")],
|
||||||
|
)
|
||||||
|
with patch("observatory.collect_snapshot", return_value=ok_snap), \
|
||||||
|
patch("observatory.store_snapshot"):
|
||||||
|
rc = obs.main(["--check"])
|
||||||
|
assert rc == 0
|
||||||
|
|
||||||
|
def test_check_exits_nonzero_on_critical(self, cfg, monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DB_PATH", str(tmp_path / "obs.db"))
|
||||||
|
bad_snap = obs.HealthSnapshot(
|
||||||
|
ts="2026-01-01T00:00:00+00:00",
|
||||||
|
checks=[obs.CheckResult(name="gateway_process", status="critical", message="down")],
|
||||||
|
)
|
||||||
|
with patch("observatory.collect_snapshot", return_value=bad_snap), \
|
||||||
|
patch("observatory.store_snapshot"):
|
||||||
|
rc = obs.main(["--check"])
|
||||||
|
assert rc != 0
|
||||||
|
|
||||||
|
def test_digest_flag(self, monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DB_PATH", str(tmp_path / "obs.db"))
|
||||||
|
rc = obs.main(["--digest"])
|
||||||
|
assert rc == 0
|
||||||
|
|
||||||
|
def test_slo_flag(self, monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DB_PATH", str(tmp_path / "obs.db"))
|
||||||
|
rc = obs.main(["--slo"])
|
||||||
|
assert rc == 0
|
||||||
|
|
||||||
|
def test_history_flag(self, monkeypatch, tmp_path):
|
||||||
|
monkeypatch.setenv("OBSERVATORY_DB_PATH", str(tmp_path / "obs.db"))
|
||||||
|
rc = obs.main(["--history", "5"])
|
||||||
|
assert rc == 0
|
||||||
242
tests/test_wizard_bootstrap.py
Normal file
242
tests/test_wizard_bootstrap.py
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
"""
|
||||||
|
Tests for wizard-bootstrap tooling (Epic-004).
|
||||||
|
|
||||||
|
These tests exercise the bootstrap, skills audit, and dependency checker
|
||||||
|
without requiring network access or API keys.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Ensure repo root importable
|
||||||
|
REPO_ROOT = Path(__file__).parent.parent
|
||||||
|
sys.path.insert(0, str(REPO_ROOT))
|
||||||
|
sys.path.insert(0, str(REPO_ROOT / "wizard-bootstrap"))
|
||||||
|
|
||||||
|
import wizard_bootstrap as wb
|
||||||
|
import skills_audit as sa
|
||||||
|
import dependency_checker as dc
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# wizard_bootstrap tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestCheckPythonVersion:
|
||||||
|
def test_current_python_passes(self):
|
||||||
|
result = wb.check_python_version()
|
||||||
|
assert result.passed
|
||||||
|
assert "Python" in result.message
|
||||||
|
|
||||||
|
def test_old_python_fails(self):
|
||||||
|
# Patch version_info as a tuple (matches [:3] unpacking used in the check)
|
||||||
|
old_info = sys.version_info
|
||||||
|
try:
|
||||||
|
sys.version_info = (3, 10, 0, "final", 0) # type: ignore[assignment]
|
||||||
|
result = wb.check_python_version()
|
||||||
|
finally:
|
||||||
|
sys.version_info = old_info # type: ignore[assignment]
|
||||||
|
assert not result.passed
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckCoreDeps:
|
||||||
|
def test_passes_when_all_present(self):
|
||||||
|
result = wb.check_core_deps()
|
||||||
|
# In a healthy dev environment all packages should be importable
|
||||||
|
assert result.passed
|
||||||
|
|
||||||
|
def test_fails_when_package_missing(self):
|
||||||
|
orig = __import__
|
||||||
|
|
||||||
|
def fake_import(name, *args, **kwargs):
|
||||||
|
if name == "openai":
|
||||||
|
raise ModuleNotFoundError(name)
|
||||||
|
return orig(name, *args, **kwargs)
|
||||||
|
|
||||||
|
with mock.patch("builtins.__import__", side_effect=fake_import):
|
||||||
|
with mock.patch("importlib.import_module", side_effect=ModuleNotFoundError("openai")):
|
||||||
|
result = wb.check_core_deps()
|
||||||
|
# With mocked importlib the check should detect the missing module
|
||||||
|
assert not result.passed
|
||||||
|
assert "openai" in result.message
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckEnvVars:
|
||||||
|
def test_fails_when_no_key_set(self):
|
||||||
|
env_keys = [
|
||||||
|
"OPENROUTER_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||||
|
"OPENAI_API_KEY", "GLM_API_KEY", "KIMI_API_KEY", "MINIMAX_API_KEY",
|
||||||
|
]
|
||||||
|
with mock.patch.dict(os.environ, {k: "" for k in env_keys}, clear=False):
|
||||||
|
# Remove all provider keys
|
||||||
|
env = {k: v for k, v in os.environ.items() if k not in env_keys}
|
||||||
|
with mock.patch.dict(os.environ, env, clear=True):
|
||||||
|
result = wb.check_env_vars()
|
||||||
|
assert not result.passed
|
||||||
|
|
||||||
|
def test_passes_when_key_set(self):
|
||||||
|
with mock.patch.dict(os.environ, {"ANTHROPIC_API_KEY": "sk-test-key"}):
|
||||||
|
result = wb.check_env_vars()
|
||||||
|
assert result.passed
|
||||||
|
assert "ANTHROPIC_API_KEY" in result.message
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckHermesHome:
|
||||||
|
def test_passes_with_existing_writable_dir(self, tmp_path):
|
||||||
|
with mock.patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
|
||||||
|
result = wb.check_hermes_home()
|
||||||
|
assert result.passed
|
||||||
|
|
||||||
|
def test_fails_when_dir_missing(self, tmp_path):
|
||||||
|
missing = tmp_path / "nonexistent"
|
||||||
|
with mock.patch.dict(os.environ, {"HERMES_HOME": str(missing)}):
|
||||||
|
result = wb.check_hermes_home()
|
||||||
|
assert not result.passed
|
||||||
|
|
||||||
|
|
||||||
|
class TestBootstrapReport:
|
||||||
|
def test_passed_when_all_pass(self):
|
||||||
|
report = wb.BootstrapReport()
|
||||||
|
report.add(wb.CheckResult("a", True, "ok"))
|
||||||
|
report.add(wb.CheckResult("b", True, "ok"))
|
||||||
|
assert report.passed
|
||||||
|
assert report.failed == []
|
||||||
|
|
||||||
|
def test_failed_when_any_fail(self):
|
||||||
|
report = wb.BootstrapReport()
|
||||||
|
report.add(wb.CheckResult("a", True, "ok"))
|
||||||
|
report.add(wb.CheckResult("b", False, "bad", fix_hint="fix it"))
|
||||||
|
assert not report.passed
|
||||||
|
assert len(report.failed) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# skills_audit tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestSkillsAudit:
|
||||||
|
def _make_skill(self, skills_root: Path, rel_path: str, content: str = "# skill") -> Path:
|
||||||
|
"""Create a SKILL.md at skills_root/rel_path/SKILL.md."""
|
||||||
|
skill_dir = skills_root / rel_path
|
||||||
|
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
skill_md = skill_dir / "SKILL.md"
|
||||||
|
skill_md.write_text(content)
|
||||||
|
return skill_md
|
||||||
|
|
||||||
|
def test_no_drift_when_identical(self, tmp_path):
|
||||||
|
# run_audit expects repo_root/skills/ and installed_root/
|
||||||
|
repo = tmp_path / "repo"
|
||||||
|
installed = tmp_path / "installed"
|
||||||
|
content = "# Same content"
|
||||||
|
self._make_skill(repo / "skills", "cat/skill-a", content)
|
||||||
|
self._make_skill(installed, "cat/skill-a", content)
|
||||||
|
|
||||||
|
report = sa.run_audit(repo, installed)
|
||||||
|
assert not report.has_drift
|
||||||
|
assert len(report.by_status("OK")) == 1
|
||||||
|
|
||||||
|
def test_detects_missing_skill(self, tmp_path):
|
||||||
|
repo = tmp_path / "repo"
|
||||||
|
installed = tmp_path / "installed"
|
||||||
|
installed.mkdir()
|
||||||
|
self._make_skill(repo / "skills", "cat/skill-a")
|
||||||
|
|
||||||
|
report = sa.run_audit(repo, installed)
|
||||||
|
assert report.has_drift
|
||||||
|
assert len(report.by_status("MISSING")) == 1
|
||||||
|
|
||||||
|
def test_detects_extra_skill(self, tmp_path):
|
||||||
|
repo = tmp_path / "repo"
|
||||||
|
(repo / "skills").mkdir(parents=True)
|
||||||
|
installed = tmp_path / "installed"
|
||||||
|
self._make_skill(installed, "cat/skill-a")
|
||||||
|
|
||||||
|
report = sa.run_audit(repo, installed)
|
||||||
|
assert report.has_drift
|
||||||
|
assert len(report.by_status("EXTRA")) == 1
|
||||||
|
|
||||||
|
def test_detects_outdated_skill(self, tmp_path):
|
||||||
|
repo = tmp_path / "repo"
|
||||||
|
installed = tmp_path / "installed"
|
||||||
|
self._make_skill(repo / "skills", "cat/skill-a", "# Repo version")
|
||||||
|
self._make_skill(installed, "cat/skill-a", "# Installed version")
|
||||||
|
|
||||||
|
report = sa.run_audit(repo, installed)
|
||||||
|
assert report.has_drift
|
||||||
|
assert len(report.by_status("OUTDATED")) == 1
|
||||||
|
|
||||||
|
def test_fix_copies_missing_skills(self, tmp_path):
|
||||||
|
repo = tmp_path / "repo"
|
||||||
|
installed = tmp_path / "installed"
|
||||||
|
installed.mkdir()
|
||||||
|
self._make_skill(repo / "skills", "cat/skill-a", "# content")
|
||||||
|
|
||||||
|
report = sa.run_audit(repo, installed)
|
||||||
|
assert len(report.by_status("MISSING")) == 1
|
||||||
|
|
||||||
|
sa.apply_fix(report)
|
||||||
|
|
||||||
|
report2 = sa.run_audit(repo, installed)
|
||||||
|
assert not report2.has_drift
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# dependency_checker tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestDependencyChecker:
|
||||||
|
def _make_skill(self, root: Path, rel_path: str, content: str) -> None:
|
||||||
|
skill_dir = root / rel_path
|
||||||
|
skill_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(skill_dir / "SKILL.md").write_text(content)
|
||||||
|
|
||||||
|
def test_no_deps_when_no_frontmatter(self, tmp_path):
|
||||||
|
self._make_skill(tmp_path, "cat/plain", "# No frontmatter")
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path)
|
||||||
|
assert report.deps == []
|
||||||
|
|
||||||
|
def test_detects_missing_binary(self, tmp_path):
|
||||||
|
content = "---\nname: test\ndependencies:\n binaries: [definitely_not_a_real_binary_xyz]\n---\n"
|
||||||
|
self._make_skill(tmp_path, "cat/skill", content)
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path)
|
||||||
|
assert len(report.deps) == 1
|
||||||
|
assert not report.deps[0].satisfied
|
||||||
|
assert report.deps[0].binary == "definitely_not_a_real_binary_xyz"
|
||||||
|
|
||||||
|
def test_detects_present_binary(self, tmp_path):
|
||||||
|
content = "---\nname: test\ndependencies:\n binaries: [python3]\n---\n"
|
||||||
|
self._make_skill(tmp_path, "cat/skill", content)
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path)
|
||||||
|
assert len(report.deps) == 1
|
||||||
|
assert report.deps[0].satisfied
|
||||||
|
|
||||||
|
def test_detects_missing_env_var(self, tmp_path):
|
||||||
|
content = "---\nname: test\ndependencies:\n env_vars: [DEFINITELY_NOT_SET_XYZ_123]\n---\n"
|
||||||
|
self._make_skill(tmp_path, "cat/skill", content)
|
||||||
|
env = {k: v for k, v in os.environ.items() if k != "DEFINITELY_NOT_SET_XYZ_123"}
|
||||||
|
with mock.patch.dict(os.environ, env, clear=True):
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path)
|
||||||
|
assert len(report.deps) == 1
|
||||||
|
assert not report.deps[0].satisfied
|
||||||
|
|
||||||
|
def test_detects_present_env_var(self, tmp_path):
|
||||||
|
content = "---\nname: test\ndependencies:\n env_vars: [MY_TEST_VAR_WIZARD]\n---\n"
|
||||||
|
self._make_skill(tmp_path, "cat/skill", content)
|
||||||
|
with mock.patch.dict(os.environ, {"MY_TEST_VAR_WIZARD": "set"}):
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path)
|
||||||
|
assert len(report.deps) == 1
|
||||||
|
assert report.deps[0].satisfied
|
||||||
|
|
||||||
|
def test_skill_filter(self, tmp_path):
|
||||||
|
content = "---\nname: test\ndependencies:\n binaries: [python3]\n---\n"
|
||||||
|
self._make_skill(tmp_path, "cat/skill-a", content)
|
||||||
|
self._make_skill(tmp_path, "cat/skill-b", content)
|
||||||
|
|
||||||
|
report = dc.run_dep_check(skills_dir=tmp_path, skill_filter="skill-a")
|
||||||
|
assert len(report.deps) == 1
|
||||||
|
assert "skill-a" in report.deps[0].skill_path
|
||||||
215
wizard-bootstrap/FORGE_OPERATIONS_GUIDE.md
Normal file
215
wizard-bootstrap/FORGE_OPERATIONS_GUIDE.md
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
# Forge Operations Guide
|
||||||
|
|
||||||
|
> **Audience:** Forge wizards joining the hermes-agent project
|
||||||
|
> **Purpose:** Practical patterns, common pitfalls, and operational wisdom
|
||||||
|
> **Companion to:** `WIZARD_ENVIRONMENT_CONTRACT.md`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The One Rule
|
||||||
|
|
||||||
|
**Read the actual state before acting.**
|
||||||
|
|
||||||
|
Before touching any service, config, or codebase: `ps aux | grep hermes`, `cat ~/.hermes/gateway_state.json`, `curl http://127.0.0.1:8642/health`. The forge punishes assumptions harder than it rewards speed. Evidence always beats intuition.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## First 15 Minutes on a New System
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Validate your environment
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
|
||||||
|
# 2. Check what is actually running
|
||||||
|
ps aux | grep -E 'hermes|python|gateway'
|
||||||
|
|
||||||
|
# 3. Check the data directory
|
||||||
|
ls -la ~/.hermes/
|
||||||
|
cat ~/.hermes/gateway_state.json 2>/dev/null | python3 -m json.tool
|
||||||
|
|
||||||
|
# 4. Verify health endpoints (if gateway is up)
|
||||||
|
curl -sf http://127.0.0.1:8642/health | python3 -m json.tool
|
||||||
|
|
||||||
|
# 5. Run the smoke test
|
||||||
|
source venv/bin/activate
|
||||||
|
python -m pytest tests/ -q -x --timeout=60 2>&1 | tail -20
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not begin work until all five steps return clean output.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Import Chain — Know It, Respect It
|
||||||
|
|
||||||
|
The dependency order is load-bearing. Violating it causes silent failures:
|
||||||
|
|
||||||
|
```
|
||||||
|
tools/registry.py ← no deps; imported by everything
|
||||||
|
↑
|
||||||
|
tools/*.py ← each calls registry.register() at import time
|
||||||
|
↑
|
||||||
|
model_tools.py ← imports registry; triggers tool discovery
|
||||||
|
↑
|
||||||
|
run_agent.py / cli.py / batch_runner.py
|
||||||
|
```
|
||||||
|
|
||||||
|
**If you add a tool file**, you must also:
|
||||||
|
1. Add its import to `model_tools.py` `_discover_tools()`
|
||||||
|
2. Add it to `toolsets.py` (core or a named toolset)
|
||||||
|
|
||||||
|
Missing either step causes the tool to silently not appear — no error, just absence.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The Five Profile Rules
|
||||||
|
|
||||||
|
Hermes supports isolated profiles (`hermes -p myprofile`). Profile-unsafe code has caused repeated bugs. Memorize these:
|
||||||
|
|
||||||
|
| Do this | Not this |
|
||||||
|
|---------|----------|
|
||||||
|
| `get_hermes_home()` | `Path.home() / ".hermes"` |
|
||||||
|
| `display_hermes_home()` in user messages | hardcoded `~/.hermes` strings |
|
||||||
|
| `get_hermes_home() / "sessions"` in tests | `~/.hermes/sessions` in tests |
|
||||||
|
|
||||||
|
Import both from `hermes_constants`. Every `~/.hermes` hardcode is a latent profile bug.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Prompt Caching — Do Not Break It
|
||||||
|
|
||||||
|
The agent caches system prompts. Cache breaks force re-billing of the entire context window on every turn. The following actions break caching mid-conversation and are forbidden:
|
||||||
|
|
||||||
|
- Altering past context
|
||||||
|
- Changing the active toolset
|
||||||
|
- Reloading memories or rebuilding the system prompt
|
||||||
|
|
||||||
|
The only sanctioned context alteration is the context compressor (`agent/context_compressor.py`). If your feature touches the message history, read that file first.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding a Slash Command (Checklist)
|
||||||
|
|
||||||
|
Four files, in order:
|
||||||
|
|
||||||
|
1. **`hermes_cli/commands.py`** — add `CommandDef` to `COMMAND_REGISTRY`
|
||||||
|
2. **`cli.py`** — add handler branch in `HermesCLI.process_command()`
|
||||||
|
3. **`gateway/run.py`** — add handler if it should work in messaging platforms
|
||||||
|
4. **Aliases** — add to the `aliases` tuple on the `CommandDef`; everything else updates automatically
|
||||||
|
|
||||||
|
All downstream consumers (Telegram menu, Slack routing, autocomplete, help text) derive from `COMMAND_REGISTRY`. You never touch them directly.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Tool Schema Pitfalls
|
||||||
|
|
||||||
|
**Do NOT cross-reference other toolsets in schema descriptions.**
|
||||||
|
Writing "prefer `web_search` over this tool" in a browser tool's description will cause the model to hallucinate calls to `web_search` when it's not loaded. Cross-references belong in `get_tool_definitions()` post-processing blocks in `model_tools.py`.
|
||||||
|
|
||||||
|
**Do NOT use `\033[K` (ANSI erase-to-EOL) in display code.**
|
||||||
|
Under `prompt_toolkit`'s `patch_stdout`, it leaks as literal `?[K`. Use space-padding instead: `f"\r{line}{' ' * pad}"`.
|
||||||
|
|
||||||
|
**Do NOT use `simple_term_menu` for interactive menus.**
|
||||||
|
It ghosts on scroll in tmux/iTerm2. Use `curses` (stdlib). See `hermes_cli/tools_config.py` for the pattern.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Health Check Anatomy
|
||||||
|
|
||||||
|
A healthy instance returns:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"gateway_state": "running",
|
||||||
|
"platforms": {
|
||||||
|
"telegram": {"state": "connected"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
| Field | Healthy value | What a bad value means |
|
||||||
|
|-------|--------------|----------------------|
|
||||||
|
| `status` | `"ok"` | HTTP server down |
|
||||||
|
| `gateway_state` | `"running"` | Still starting or crashed |
|
||||||
|
| `platforms.<name>.state` | `"connected"` | Auth failure or network issue |
|
||||||
|
|
||||||
|
`gateway_state: "starting"` is normal for up to 60 s on boot. Beyond that, check logs for auth errors:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
journalctl -u hermes-gateway --since "2 minutes ago" | grep -i "error\|token\|auth"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Gateway Won't Start — Diagnosis Order
|
||||||
|
|
||||||
|
1. `ss -tlnp | grep 8642` — port conflict?
|
||||||
|
2. `cat ~/.hermes/gateway.pid` → `ps -p <pid>` — stale PID file?
|
||||||
|
3. `hermes gateway start --replace` — clears stale locks and PIDs
|
||||||
|
4. `HERMES_LOG_LEVEL=DEBUG hermes gateway start` — verbose output
|
||||||
|
5. Check `~/.hermes/.env` — missing or placeholder token?
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Before Every PR
|
||||||
|
|
||||||
|
```bash
|
||||||
|
source venv/bin/activate
|
||||||
|
python -m pytest tests/ -q # full suite: ~3 min, ~3000 tests
|
||||||
|
python scripts/deploy-validate # deployment health check
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py # environment sanity
|
||||||
|
```
|
||||||
|
|
||||||
|
All three must exit 0. Do not skip. "It works locally" is not sufficient evidence.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Session and State Files
|
||||||
|
|
||||||
|
| Store | Location | Notes |
|
||||||
|
|-------|----------|-------|
|
||||||
|
| Sessions | `~/.hermes/sessions/*.json` | Persisted across restarts |
|
||||||
|
| Memories | `~/.hermes/memories/*.md` | Written by the agent's memory tool |
|
||||||
|
| Cron jobs | `~/.hermes/cron/*.json` | Scheduler state |
|
||||||
|
| Gateway state | `~/.hermes/gateway_state.json` | Live platform connection status |
|
||||||
|
| Response store | `~/.hermes/response_store.db` | SQLite WAL — API server only |
|
||||||
|
|
||||||
|
All paths go through `get_hermes_home()`. Never hardcode. Always backup before a major update:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
tar czf ~/backups/hermes_$(date +%F_%H%M).tar.gz ~/.hermes/
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Writing Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m pytest tests/path/to/test.py -q # single file
|
||||||
|
python -m pytest tests/ -q -k "test_name" # by name
|
||||||
|
python -m pytest tests/ -q -x # stop on first failure
|
||||||
|
```
|
||||||
|
|
||||||
|
**Test isolation rules:**
|
||||||
|
- `tests/conftest.py` has an autouse fixture that redirects `HERMES_HOME` to a temp dir. Never write to `~/.hermes/` in tests.
|
||||||
|
- Profile tests must mock both `Path.home()` and `HERMES_HOME`. See `tests/hermes_cli/test_profiles.py` for the pattern.
|
||||||
|
- Do not mock the database. Integration tests should use real SQLite with a temp path.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Commit Conventions
|
||||||
|
|
||||||
|
```
|
||||||
|
feat: add X # new capability
|
||||||
|
fix: correct Y # bug fix
|
||||||
|
refactor: restructure Z # no behaviour change
|
||||||
|
test: add tests for W # test-only
|
||||||
|
chore: update deps # housekeeping
|
||||||
|
docs: clarify X # documentation only
|
||||||
|
```
|
||||||
|
|
||||||
|
Include `Fixes #NNN` or `Refs #NNN` in the commit message body to close or reference issues automatically.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*This guide lives in `wizard-bootstrap/`. Update it when you discover a new pitfall or pattern worth preserving.*
|
||||||
162
wizard-bootstrap/WIZARD_ENVIRONMENT_CONTRACT.md
Normal file
162
wizard-bootstrap/WIZARD_ENVIRONMENT_CONTRACT.md
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
# Wizard Environment Contract
|
||||||
|
|
||||||
|
> **Version:** 1.0.0
|
||||||
|
> **Owner:** Wizard Council (Bezalel Epic-004)
|
||||||
|
> **Last updated:** 2026-04-06
|
||||||
|
|
||||||
|
This document defines the minimum viable state every forge wizard must maintain.
|
||||||
|
A wizard that satisfies all requirements is considered **forge-ready**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Python Runtime
|
||||||
|
|
||||||
|
| Requirement | Minimum | Notes |
|
||||||
|
|-------------|---------|-------|
|
||||||
|
| Python version | 3.11 | 3.12+ recommended |
|
||||||
|
| Virtual environment | Activated | `source venv/bin/activate` before running |
|
||||||
|
|
||||||
|
Run `python --version` to verify.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Core Package Dependencies
|
||||||
|
|
||||||
|
All packages in `requirements.txt` must be installed and importable.
|
||||||
|
Critical packages: `openai`, `anthropic`, `pyyaml`, `rich`, `requests`, `pydantic`, `prompt_toolkit`.
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. LLM Provider Key
|
||||||
|
|
||||||
|
At least one LLM provider API key must be set in `~/.hermes/.env`:
|
||||||
|
|
||||||
|
| Variable | Provider |
|
||||||
|
|----------|----------|
|
||||||
|
| `OPENROUTER_API_KEY` | OpenRouter (200+ models) |
|
||||||
|
| `ANTHROPIC_API_KEY` | Anthropic Claude |
|
||||||
|
| `ANTHROPIC_TOKEN` | Anthropic Claude (alt) |
|
||||||
|
| `OPENAI_API_KEY` | OpenAI |
|
||||||
|
| `GLM_API_KEY` | z.ai/GLM |
|
||||||
|
| `KIMI_API_KEY` | Moonshot/Kimi |
|
||||||
|
| `MINIMAX_API_KEY` | MiniMax |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Gitea Authentication
|
||||||
|
|
||||||
|
| Requirement | Details |
|
||||||
|
|-------------|---------|
|
||||||
|
| Variable | `GITEA_TOKEN` or `FORGE_TOKEN` |
|
||||||
|
| Scope | Must have repo read/write access |
|
||||||
|
| Forge URL | `https://forge.alexanderwhitestone.com` (or `FORGE_URL` env var) |
|
||||||
|
|
||||||
|
The wizard must be able to create and merge PRs on the forge.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Telegram Connectivity (Gateway Wizards)
|
||||||
|
|
||||||
|
Wizards that operate via the messaging gateway must also satisfy:
|
||||||
|
|
||||||
|
| Requirement | Details |
|
||||||
|
|-------------|---------|
|
||||||
|
| Variable | `TELEGRAM_BOT_TOKEN` |
|
||||||
|
| Home channel | `TELEGRAM_HOME_CHANNEL` |
|
||||||
|
| API reachability | `api.telegram.org` must be reachable |
|
||||||
|
|
||||||
|
CLI-only wizards may skip Telegram checks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. HERMES_HOME
|
||||||
|
|
||||||
|
| Requirement | Details |
|
||||||
|
|-------------|---------|
|
||||||
|
| Default | `~/.hermes` |
|
||||||
|
| Override | `HERMES_HOME` env var |
|
||||||
|
| Permissions | Owner-writable (700 recommended) |
|
||||||
|
|
||||||
|
The directory must exist and be writable before any hermes command runs.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Skill Dependencies (Per-Skill)
|
||||||
|
|
||||||
|
Each skill may declare binary and environment-variable dependencies in its
|
||||||
|
`SKILL.md` frontmatter:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
name: my-skill
|
||||||
|
dependencies:
|
||||||
|
binaries: [ffmpeg, imagemagick]
|
||||||
|
env_vars: [MY_API_KEY]
|
||||||
|
---
|
||||||
|
```
|
||||||
|
|
||||||
|
A wizard must satisfy all dependencies for any skill it intends to run.
|
||||||
|
|
||||||
|
**Check all skill deps:**
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/dependency_checker.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Enforcement
|
||||||
|
|
||||||
|
### New Wizard Onboarding
|
||||||
|
|
||||||
|
Run the bootstrap script before going online:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Resolve all failures before beginning work.
|
||||||
|
|
||||||
|
### Ongoing Compliance
|
||||||
|
|
||||||
|
A monthly audit runs automatically (see `wizard-bootstrap/monthly_audit.py`).
|
||||||
|
The report is saved to `~/.hermes/wizard-council/audit-YYYY-MM.md` and posted
|
||||||
|
to the `wizard-council-automation` Telegram channel.
|
||||||
|
|
||||||
|
### Skill Drift
|
||||||
|
|
||||||
|
Run the skills audit to detect and fix drift:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python wizard-bootstrap/skills_audit.py # detect
|
||||||
|
python wizard-bootstrap/skills_audit.py --fix # sync
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Contract Versioning
|
||||||
|
|
||||||
|
Changes to this contract require a PR reviewed by at least one wizard council
|
||||||
|
member. Bump the version number and update the date above with each change.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Reference
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full environment validation
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
|
||||||
|
# Skills drift check
|
||||||
|
python wizard-bootstrap/skills_audit.py
|
||||||
|
|
||||||
|
# Dependency check
|
||||||
|
python wizard-bootstrap/dependency_checker.py
|
||||||
|
|
||||||
|
# Full monthly audit (all three checks, saves report)
|
||||||
|
python wizard-bootstrap/monthly_audit.py
|
||||||
|
```
|
||||||
1
wizard-bootstrap/__init__.py
Normal file
1
wizard-bootstrap/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# wizard-bootstrap package
|
||||||
300
wizard-bootstrap/dependency_checker.py
Normal file
300
wizard-bootstrap/dependency_checker.py
Normal file
@@ -0,0 +1,300 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
dependency_checker.py — Cross-Wizard Dependency Validator
|
||||||
|
|
||||||
|
Each skill may declare binary or environment-variable dependencies in its
|
||||||
|
SKILL.md frontmatter under a `dependencies` key:
|
||||||
|
|
||||||
|
---
|
||||||
|
name: my-skill
|
||||||
|
dependencies:
|
||||||
|
binaries: [ffmpeg, imagemagick]
|
||||||
|
env_vars: [MY_API_KEY, MY_SECRET]
|
||||||
|
---
|
||||||
|
|
||||||
|
This script scans all installed skills, extracts declared dependencies, and
|
||||||
|
checks whether each is satisfied in the current environment.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python wizard-bootstrap/dependency_checker.py
|
||||||
|
python wizard-bootstrap/dependency_checker.py --json
|
||||||
|
python wizard-bootstrap/dependency_checker.py --skill software-development/code-review
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml
|
||||||
|
HAS_YAML = True
|
||||||
|
except ImportError:
|
||||||
|
HAS_YAML = False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data model
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SkillDep:
|
||||||
|
skill_path: str
|
||||||
|
skill_name: str
|
||||||
|
binary: Optional[str] = None
|
||||||
|
env_var: Optional[str] = None
|
||||||
|
satisfied: bool = False
|
||||||
|
detail: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DepReport:
|
||||||
|
deps: list[SkillDep] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def all_satisfied(self) -> bool:
|
||||||
|
return all(d.satisfied for d in self.deps)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def unsatisfied(self) -> list[SkillDep]:
|
||||||
|
return [d for d in self.deps if not d.satisfied]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Frontmatter parser
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _parse_frontmatter(text: str) -> dict:
|
||||||
|
"""Extract YAML frontmatter from a SKILL.md file."""
|
||||||
|
if not text.startswith("---"):
|
||||||
|
return {}
|
||||||
|
end = text.find("\n---", 3)
|
||||||
|
if end == -1:
|
||||||
|
return {}
|
||||||
|
fm_text = text[3:end].strip()
|
||||||
|
if not HAS_YAML:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
return yaml.safe_load(fm_text) or {}
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _load_skill_deps(skill_md: Path) -> tuple[str, list[str], list[str]]:
|
||||||
|
"""
|
||||||
|
Returns (skill_name, binaries, env_vars) from a SKILL.md frontmatter.
|
||||||
|
"""
|
||||||
|
text = skill_md.read_text(encoding="utf-8", errors="replace")
|
||||||
|
fm = _parse_frontmatter(text)
|
||||||
|
skill_name = fm.get("name", skill_md.parent.name)
|
||||||
|
deps = fm.get("dependencies", {})
|
||||||
|
if not isinstance(deps, dict):
|
||||||
|
return skill_name, [], []
|
||||||
|
binaries = deps.get("binaries") or []
|
||||||
|
env_vars = deps.get("env_vars") or []
|
||||||
|
if isinstance(binaries, str):
|
||||||
|
binaries = [binaries]
|
||||||
|
if isinstance(env_vars, str):
|
||||||
|
env_vars = [env_vars]
|
||||||
|
return skill_name, list(binaries), list(env_vars)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Checks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _check_binary(binary: str) -> tuple[bool, str]:
|
||||||
|
path = shutil.which(binary)
|
||||||
|
if path:
|
||||||
|
return True, f"found at {path}"
|
||||||
|
return False, f"not found in PATH"
|
||||||
|
|
||||||
|
|
||||||
|
def _check_env_var(var: str) -> tuple[bool, str]:
|
||||||
|
val = os.environ.get(var)
|
||||||
|
if val:
|
||||||
|
return True, "set"
|
||||||
|
return False, "not set"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Scanner
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _find_skills_dir() -> Optional[Path]:
|
||||||
|
"""Resolve skills directory: prefer repo root, fall back to HERMES_HOME."""
|
||||||
|
# Check if we're inside the repo
|
||||||
|
repo_root = Path(__file__).parent.parent
|
||||||
|
repo_skills = repo_root / "skills"
|
||||||
|
if repo_skills.exists():
|
||||||
|
return repo_skills
|
||||||
|
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
for candidate in [hermes_home / "skills", hermes_home / "hermes-agent" / "skills"]:
|
||||||
|
if candidate.exists():
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def run_dep_check(skills_dir: Optional[Path] = None, skill_filter: Optional[str] = None) -> DepReport:
|
||||||
|
resolved = skills_dir or _find_skills_dir()
|
||||||
|
report = DepReport()
|
||||||
|
|
||||||
|
if resolved is None or not resolved.exists():
|
||||||
|
return report
|
||||||
|
|
||||||
|
# Load ~/.hermes/.env so env var checks work
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
env_path = hermes_home / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv # noqa: PLC0415
|
||||||
|
load_dotenv(env_path, override=False)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for skill_md in sorted(resolved.rglob("SKILL.md")):
|
||||||
|
rel = str(skill_md.parent.relative_to(resolved))
|
||||||
|
if skill_filter and skill_filter not in rel:
|
||||||
|
continue
|
||||||
|
|
||||||
|
skill_name, binaries, env_vars = _load_skill_deps(skill_md)
|
||||||
|
|
||||||
|
for binary in binaries:
|
||||||
|
ok, detail = _check_binary(binary)
|
||||||
|
report.deps.append(SkillDep(
|
||||||
|
skill_path=rel,
|
||||||
|
skill_name=skill_name,
|
||||||
|
binary=binary,
|
||||||
|
satisfied=ok,
|
||||||
|
detail=detail,
|
||||||
|
))
|
||||||
|
|
||||||
|
for var in env_vars:
|
||||||
|
ok, detail = _check_env_var(var)
|
||||||
|
report.deps.append(SkillDep(
|
||||||
|
skill_path=rel,
|
||||||
|
skill_name=skill_name,
|
||||||
|
env_var=var,
|
||||||
|
satisfied=ok,
|
||||||
|
detail=detail,
|
||||||
|
))
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Rendering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_GREEN = "\033[32m"
|
||||||
|
_RED = "\033[31m"
|
||||||
|
_YELLOW = "\033[33m"
|
||||||
|
_BOLD = "\033[1m"
|
||||||
|
_RESET = "\033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
def _render_terminal(report: DepReport) -> None:
|
||||||
|
print(f"\n{_BOLD}=== Cross-Wizard Dependency Check ==={_RESET}\n")
|
||||||
|
|
||||||
|
if not report.deps:
|
||||||
|
print("No skill dependencies declared. Skills use implicit deps only.\n")
|
||||||
|
print(
|
||||||
|
f"{_YELLOW}Tip:{_RESET} Declare binary/env_var deps in SKILL.md frontmatter "
|
||||||
|
"under a 'dependencies' key to make them checkable.\n"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
for dep in report.deps:
|
||||||
|
icon = f"{_GREEN}✓{_RESET}" if dep.satisfied else f"{_RED}✗{_RESET}"
|
||||||
|
if dep.binary:
|
||||||
|
dep_type = "binary"
|
||||||
|
dep_name = dep.binary
|
||||||
|
else:
|
||||||
|
dep_type = "env_var"
|
||||||
|
dep_name = dep.env_var
|
||||||
|
|
||||||
|
print(f" {icon} [{dep.skill_path}] {dep_type}:{dep_name} — {dep.detail}")
|
||||||
|
|
||||||
|
total = len(report.deps)
|
||||||
|
satisfied = sum(1 for d in report.deps if d.satisfied)
|
||||||
|
print()
|
||||||
|
if report.all_satisfied:
|
||||||
|
print(f"{_GREEN}{_BOLD}All {total} dependencies satisfied.{_RESET}\n")
|
||||||
|
else:
|
||||||
|
failed = total - satisfied
|
||||||
|
print(
|
||||||
|
f"{_RED}{_BOLD}{failed}/{total} dependencies unsatisfied.{_RESET} "
|
||||||
|
"Install missing binaries and set missing env vars.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_json(report: DepReport) -> None:
|
||||||
|
out = {
|
||||||
|
"all_satisfied": report.all_satisfied,
|
||||||
|
"summary": {
|
||||||
|
"total": len(report.deps),
|
||||||
|
"satisfied": sum(1 for d in report.deps if d.satisfied),
|
||||||
|
"unsatisfied": len(report.unsatisfied),
|
||||||
|
},
|
||||||
|
"deps": [
|
||||||
|
{
|
||||||
|
"skill_path": d.skill_path,
|
||||||
|
"skill_name": d.skill_name,
|
||||||
|
"type": "binary" if d.binary else "env_var",
|
||||||
|
"name": d.binary or d.env_var,
|
||||||
|
"satisfied": d.satisfied,
|
||||||
|
"detail": d.detail,
|
||||||
|
}
|
||||||
|
for d in report.deps
|
||||||
|
],
|
||||||
|
}
|
||||||
|
print(json.dumps(out, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not HAS_YAML:
|
||||||
|
print("WARNING: pyyaml not installed — cannot parse SKILL.md frontmatter. "
|
||||||
|
"Dependency declarations will be skipped.", file=sys.stderr)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Check cross-wizard skill dependencies (binaries, env vars)."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skills-dir",
|
||||||
|
default=None,
|
||||||
|
help="Skills directory to scan (default: auto-detect)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skill",
|
||||||
|
default=None,
|
||||||
|
help="Filter to a specific skill path substring",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
skills_dir = Path(args.skills_dir).resolve() if args.skills_dir else None
|
||||||
|
report = run_dep_check(skills_dir=skills_dir, skill_filter=args.skill)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
_render_json(report)
|
||||||
|
else:
|
||||||
|
_render_terminal(report)
|
||||||
|
|
||||||
|
sys.exit(0 if report.all_satisfied else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
259
wizard-bootstrap/monthly_audit.py
Normal file
259
wizard-bootstrap/monthly_audit.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
monthly_audit.py — Wizard Council Monthly Environment Audit
|
||||||
|
|
||||||
|
Runs all three checks (bootstrap, skills audit, dependency check) and
|
||||||
|
produces a combined Markdown report. Designed to be invoked by cron or
|
||||||
|
manually.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python wizard-bootstrap/monthly_audit.py
|
||||||
|
python wizard-bootstrap/monthly_audit.py --output /path/to/report.md
|
||||||
|
python wizard-bootstrap/monthly_audit.py --post-telegram # post to configured channel
|
||||||
|
|
||||||
|
The report is also written to ~/.hermes/wizard-council/audit-YYYY-MM.md
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from contextlib import redirect_stdout
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Ensure repo root is importable
|
||||||
|
_REPO_ROOT = Path(__file__).parent.parent
|
||||||
|
sys.path.insert(0, str(_REPO_ROOT))
|
||||||
|
|
||||||
|
from wizard_bootstrap import run_all_checks
|
||||||
|
from skills_audit import run_audit
|
||||||
|
from dependency_checker import run_dep_check
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Report builder
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _emoji(ok: bool) -> str:
|
||||||
|
return "✅" if ok else "❌"
|
||||||
|
|
||||||
|
|
||||||
|
def build_report(repo_root: Path) -> str:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
lines = [
|
||||||
|
f"# Wizard Council Environment Audit",
|
||||||
|
f"",
|
||||||
|
f"**Date:** {now.strftime('%Y-%m-%d %H:%M UTC')}",
|
||||||
|
f"",
|
||||||
|
f"---",
|
||||||
|
f"",
|
||||||
|
]
|
||||||
|
|
||||||
|
# 1. Bootstrap checks
|
||||||
|
lines.append("## 1. Environment Bootstrap")
|
||||||
|
lines.append("")
|
||||||
|
bootstrap = run_all_checks()
|
||||||
|
for check in bootstrap.checks:
|
||||||
|
icon = _emoji(check.passed)
|
||||||
|
label = check.name.replace("_", " ").title()
|
||||||
|
lines.append(f"- {icon} **{label}**: {check.message}")
|
||||||
|
if not check.passed and check.fix_hint:
|
||||||
|
lines.append(f" - _Fix_: {check.fix_hint}")
|
||||||
|
lines.append("")
|
||||||
|
if bootstrap.passed:
|
||||||
|
lines.append("**Environment: READY** ✅")
|
||||||
|
else:
|
||||||
|
failed = len(bootstrap.failed)
|
||||||
|
lines.append(f"**Environment: {failed} check(s) FAILED** ❌")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# 2. Skills audit
|
||||||
|
lines.append("## 2. Skills Drift Audit")
|
||||||
|
lines.append("")
|
||||||
|
skills_report = run_audit(repo_root)
|
||||||
|
missing = skills_report.by_status("MISSING")
|
||||||
|
extra = skills_report.by_status("EXTRA")
|
||||||
|
outdated = skills_report.by_status("OUTDATED")
|
||||||
|
ok_count = len(skills_report.by_status("OK"))
|
||||||
|
total = len(skills_report.drifts)
|
||||||
|
|
||||||
|
lines.append(f"| Status | Count |")
|
||||||
|
lines.append(f"|--------|-------|")
|
||||||
|
lines.append(f"| ✅ OK | {ok_count} |")
|
||||||
|
lines.append(f"| ❌ Missing | {len(missing)} |")
|
||||||
|
lines.append(f"| ⚠️ Extra | {len(extra)} |")
|
||||||
|
lines.append(f"| 🔄 Outdated | {len(outdated)} |")
|
||||||
|
lines.append(f"| **Total** | **{total}** |")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if missing:
|
||||||
|
lines.append("### Missing Skills (in repo, not installed)")
|
||||||
|
for d in missing:
|
||||||
|
lines.append(f"- `{d.skill_path}`")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if outdated:
|
||||||
|
lines.append("### Outdated Skills")
|
||||||
|
for d in outdated:
|
||||||
|
lines.append(f"- `{d.skill_path}` (repo: `{d.repo_hash}`, installed: `{d.installed_hash}`)")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if extra:
|
||||||
|
lines.append("### Extra Skills (installed, not in repo)")
|
||||||
|
for d in extra:
|
||||||
|
lines.append(f"- `{d.skill_path}`")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if not skills_report.has_drift:
|
||||||
|
lines.append("**Skills: IN SYNC** ✅")
|
||||||
|
else:
|
||||||
|
lines.append("**Skills: DRIFT DETECTED** ❌ — run `python wizard-bootstrap/skills_audit.py --fix`")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# 3. Dependency check
|
||||||
|
lines.append("## 3. Cross-Wizard Dependency Check")
|
||||||
|
lines.append("")
|
||||||
|
dep_report = run_dep_check()
|
||||||
|
|
||||||
|
if not dep_report.deps:
|
||||||
|
lines.append("No explicit dependencies declared in SKILL.md frontmatter.")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(
|
||||||
|
"_Tip: Add a `dependencies` block to SKILL.md to make binary/env_var "
|
||||||
|
"requirements checkable automatically._"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
satisfied = sum(1 for d in dep_report.deps if d.satisfied)
|
||||||
|
total_deps = len(dep_report.deps)
|
||||||
|
lines.append(f"**{satisfied}/{total_deps} dependencies satisfied.**")
|
||||||
|
lines.append("")
|
||||||
|
if dep_report.unsatisfied:
|
||||||
|
lines.append("### Unsatisfied Dependencies")
|
||||||
|
for dep in dep_report.unsatisfied:
|
||||||
|
dep_type = "binary" if dep.binary else "env_var"
|
||||||
|
dep_name = dep.binary or dep.env_var
|
||||||
|
lines.append(f"- `[{dep.skill_path}]` {dep_type}:`{dep_name}` — {dep.detail}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if dep_report.all_satisfied:
|
||||||
|
lines.append("**Dependencies: ALL SATISFIED** ✅")
|
||||||
|
else:
|
||||||
|
lines.append("**Dependencies: ISSUES FOUND** ❌")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("---")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
overall_ok = bootstrap.passed and not skills_report.has_drift and dep_report.all_satisfied
|
||||||
|
lines.append("## Summary")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"| Check | Status |")
|
||||||
|
lines.append(f"|-------|--------|")
|
||||||
|
lines.append(f"| Environment Bootstrap | {_emoji(bootstrap.passed)} |")
|
||||||
|
lines.append(f"| Skills Drift | {_emoji(not skills_report.has_drift)} |")
|
||||||
|
lines.append(f"| Dependency Check | {_emoji(dep_report.all_satisfied)} |")
|
||||||
|
lines.append("")
|
||||||
|
if overall_ok:
|
||||||
|
lines.append("**Overall: FORGE READY** ✅")
|
||||||
|
else:
|
||||||
|
lines.append("**Overall: ACTION REQUIRED** ❌")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output / delivery
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _save_report(report: str, output_path: Path) -> None:
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
output_path.write_text(report, encoding="utf-8")
|
||||||
|
print(f"Report saved to: {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def _post_telegram(report: str) -> None:
|
||||||
|
"""Post the report summary to Telegram via hermes gateway if configured."""
|
||||||
|
token = os.environ.get("TELEGRAM_BOT_TOKEN")
|
||||||
|
channel = os.environ.get("TELEGRAM_HOME_CHANNEL") or os.environ.get("TELEGRAM_CHANNEL_ID")
|
||||||
|
if not (token and channel):
|
||||||
|
print("Telegram not configured (need TELEGRAM_BOT_TOKEN + TELEGRAM_HOME_CHANNEL).", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
import requests # noqa: PLC0415
|
||||||
|
|
||||||
|
# Extract just the summary section for Telegram (keep it brief)
|
||||||
|
summary_start = report.find("## Summary")
|
||||||
|
summary_text = report[summary_start:] if summary_start != -1 else report[-1000:]
|
||||||
|
payload = {
|
||||||
|
"chat_id": channel,
|
||||||
|
"text": f"🧙 **Wizard Council Monthly Audit**\n\n{summary_text}",
|
||||||
|
"parse_mode": "Markdown",
|
||||||
|
}
|
||||||
|
resp = requests.post(
|
||||||
|
f"https://api.telegram.org/bot{token}/sendMessage",
|
||||||
|
json=payload,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
print("Report summary posted to Telegram.")
|
||||||
|
else:
|
||||||
|
print(f"Telegram post failed: HTTP {resp.status_code}", file=sys.stderr)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"Telegram post error: {exc}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Run the monthly Wizard Council environment audit."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
default=None,
|
||||||
|
help="Path to save the Markdown report (default: ~/.hermes/wizard-council/audit-YYYY-MM.md)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--repo-root",
|
||||||
|
default=str(_REPO_ROOT),
|
||||||
|
help="Root of the hermes-agent repo",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--post-telegram",
|
||||||
|
action="store_true",
|
||||||
|
help="Post the report summary to Telegram",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
repo_root = Path(args.repo_root).resolve()
|
||||||
|
report = build_report(repo_root)
|
||||||
|
|
||||||
|
# Print to stdout
|
||||||
|
print(report)
|
||||||
|
|
||||||
|
# Save to default location
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
if args.output:
|
||||||
|
output_path = Path(args.output)
|
||||||
|
else:
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
output_path = hermes_home / "wizard-council" / f"audit-{now.strftime('%Y-%m')}.md"
|
||||||
|
|
||||||
|
_save_report(report, output_path)
|
||||||
|
|
||||||
|
if args.post_telegram:
|
||||||
|
_post_telegram(report)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
342
wizard-bootstrap/skills_audit.py
Normal file
342
wizard-bootstrap/skills_audit.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
skills_audit.py — Skills Drift Detector
|
||||||
|
|
||||||
|
Compares the skills bundled in the repo against those installed in
|
||||||
|
HERMES_HOME/skills/, then reports any drift:
|
||||||
|
|
||||||
|
- MISSING — skill in repo but not in installed location
|
||||||
|
- EXTRA — skill installed but not in repo (local-only)
|
||||||
|
- OUTDATED — repo skill.md differs from installed skill.md
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python wizard-bootstrap/skills_audit.py
|
||||||
|
python wizard-bootstrap/skills_audit.py --fix # copy missing skills
|
||||||
|
python wizard-bootstrap/skills_audit.py --json
|
||||||
|
python wizard-bootstrap/skills_audit.py --repo-root /path/to/hermes-agent
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import difflib
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data model
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SkillDrift:
|
||||||
|
skill_path: str # e.g. "software-development/code-review"
|
||||||
|
status: str # "MISSING" | "EXTRA" | "OUTDATED" | "OK"
|
||||||
|
repo_hash: Optional[str] = None
|
||||||
|
installed_hash: Optional[str] = None
|
||||||
|
diff_lines: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AuditReport:
|
||||||
|
drifts: list[SkillDrift] = field(default_factory=list)
|
||||||
|
repo_root: Path = Path(".")
|
||||||
|
installed_root: Path = Path(".")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_drift(self) -> bool:
|
||||||
|
return any(d.status != "OK" for d in self.drifts)
|
||||||
|
|
||||||
|
def by_status(self, status: str) -> list[SkillDrift]:
|
||||||
|
return [d for d in self.drifts if d.status == status]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _sha256_file(path: Path) -> str:
|
||||||
|
h = hashlib.sha256()
|
||||||
|
h.update(path.read_bytes())
|
||||||
|
return h.hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
def _find_skills(root: Path) -> dict[str, Path]:
|
||||||
|
"""Return {relative_skill_path: SKILL.md path} for every skill under root."""
|
||||||
|
skills: dict[str, Path] = {}
|
||||||
|
for skill_md in root.rglob("SKILL.md"):
|
||||||
|
# skill path is relative to root, e.g. "software-development/code-review"
|
||||||
|
rel = skill_md.parent.relative_to(root)
|
||||||
|
skills[str(rel)] = skill_md
|
||||||
|
return skills
|
||||||
|
|
||||||
|
|
||||||
|
def _diff_skills(repo_md: Path, installed_md: Path) -> list[str]:
|
||||||
|
repo_lines = repo_md.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||||
|
inst_lines = installed_md.read_text(encoding="utf-8", errors="replace").splitlines()
|
||||||
|
diff = list(
|
||||||
|
difflib.unified_diff(
|
||||||
|
inst_lines,
|
||||||
|
repo_lines,
|
||||||
|
fromfile="installed",
|
||||||
|
tofile="repo",
|
||||||
|
lineterm="",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return diff
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Core audit logic
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _resolve_installed_skills_root() -> Optional[Path]:
|
||||||
|
"""Return the installed skills directory, or None if not found."""
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
candidates = [
|
||||||
|
hermes_home / "skills",
|
||||||
|
hermes_home / "hermes-agent" / "skills",
|
||||||
|
]
|
||||||
|
for candidate in candidates:
|
||||||
|
if candidate.exists():
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def run_audit(repo_root: Path, installed_root: Optional[Path] = None) -> AuditReport:
|
||||||
|
repo_skills_dir = repo_root / "skills"
|
||||||
|
if not repo_skills_dir.exists():
|
||||||
|
print(f"ERROR: Repo skills directory not found: {repo_skills_dir}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
resolved_installed = installed_root or _resolve_installed_skills_root()
|
||||||
|
report = AuditReport(
|
||||||
|
repo_root=repo_root,
|
||||||
|
installed_root=resolved_installed or Path("/not-found"),
|
||||||
|
)
|
||||||
|
|
||||||
|
repo_map = _find_skills(repo_skills_dir)
|
||||||
|
|
||||||
|
if resolved_installed is None or not resolved_installed.exists():
|
||||||
|
# All repo skills are "MISSING" from the installation
|
||||||
|
for skill_path in sorted(repo_map):
|
||||||
|
report.drifts.append(
|
||||||
|
SkillDrift(
|
||||||
|
skill_path=skill_path,
|
||||||
|
status="MISSING",
|
||||||
|
repo_hash=_sha256_file(repo_map[skill_path]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return report
|
||||||
|
|
||||||
|
installed_map = _find_skills(resolved_installed)
|
||||||
|
|
||||||
|
all_paths = sorted(set(repo_map) | set(installed_map))
|
||||||
|
for skill_path in all_paths:
|
||||||
|
in_repo = skill_path in repo_map
|
||||||
|
in_installed = skill_path in installed_map
|
||||||
|
|
||||||
|
if in_repo and not in_installed:
|
||||||
|
report.drifts.append(
|
||||||
|
SkillDrift(
|
||||||
|
skill_path=skill_path,
|
||||||
|
status="MISSING",
|
||||||
|
repo_hash=_sha256_file(repo_map[skill_path]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
elif in_installed and not in_repo:
|
||||||
|
report.drifts.append(
|
||||||
|
SkillDrift(
|
||||||
|
skill_path=skill_path,
|
||||||
|
status="EXTRA",
|
||||||
|
installed_hash=_sha256_file(installed_map[skill_path]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
rh = _sha256_file(repo_map[skill_path])
|
||||||
|
ih = _sha256_file(installed_map[skill_path])
|
||||||
|
if rh != ih:
|
||||||
|
diff = _diff_skills(repo_map[skill_path], installed_map[skill_path])
|
||||||
|
report.drifts.append(
|
||||||
|
SkillDrift(
|
||||||
|
skill_path=skill_path,
|
||||||
|
status="OUTDATED",
|
||||||
|
repo_hash=rh,
|
||||||
|
installed_hash=ih,
|
||||||
|
diff_lines=diff,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
report.drifts.append(
|
||||||
|
SkillDrift(skill_path=skill_path, status="OK", repo_hash=rh, installed_hash=ih)
|
||||||
|
)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Fix: copy missing skills into installed location
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def apply_fix(report: AuditReport) -> None:
|
||||||
|
if report.installed_root == Path("/not-found"):
|
||||||
|
print("Cannot fix: installed skills directory not found.", file=sys.stderr)
|
||||||
|
return
|
||||||
|
|
||||||
|
repo_skills_dir = report.repo_root / "skills"
|
||||||
|
for drift in report.by_status("MISSING"):
|
||||||
|
src = repo_skills_dir / drift.skill_path / "SKILL.md"
|
||||||
|
dst = report.installed_root / drift.skill_path / "SKILL.md"
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy2(src, dst)
|
||||||
|
print(f" Installed: {drift.skill_path}")
|
||||||
|
|
||||||
|
for drift in report.by_status("OUTDATED"):
|
||||||
|
src = repo_skills_dir / drift.skill_path / "SKILL.md"
|
||||||
|
dst = report.installed_root / drift.skill_path / "SKILL.md"
|
||||||
|
shutil.copy2(src, dst)
|
||||||
|
print(f" Updated: {drift.skill_path}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Rendering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_GREEN = "\033[32m"
|
||||||
|
_RED = "\033[31m"
|
||||||
|
_YELLOW = "\033[33m"
|
||||||
|
_CYAN = "\033[36m"
|
||||||
|
_BOLD = "\033[1m"
|
||||||
|
_RESET = "\033[0m"
|
||||||
|
|
||||||
|
_STATUS_COLOR = {
|
||||||
|
"OK": _GREEN,
|
||||||
|
"MISSING": _RED,
|
||||||
|
"EXTRA": _YELLOW,
|
||||||
|
"OUTDATED": _CYAN,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _render_terminal(report: AuditReport, show_diff: bool = False) -> None:
|
||||||
|
print(f"\n{_BOLD}=== Wizard Skills Audit ==={_RESET}")
|
||||||
|
print(f" Repo skills: {report.repo_root / 'skills'}")
|
||||||
|
print(f" Installed skills: {report.installed_root}\n")
|
||||||
|
|
||||||
|
if not report.drifts:
|
||||||
|
print(f"{_GREEN}No skills found to compare.{_RESET}\n")
|
||||||
|
return
|
||||||
|
|
||||||
|
total = len(report.drifts)
|
||||||
|
ok = len(report.by_status("OK"))
|
||||||
|
missing = len(report.by_status("MISSING"))
|
||||||
|
extra = len(report.by_status("EXTRA"))
|
||||||
|
outdated = len(report.by_status("OUTDATED"))
|
||||||
|
|
||||||
|
for drift in sorted(report.drifts, key=lambda d: (d.status == "OK", d.skill_path)):
|
||||||
|
color = _STATUS_COLOR.get(drift.status, _RESET)
|
||||||
|
print(f" {color}{drift.status:8}{_RESET} {drift.skill_path}")
|
||||||
|
if show_diff and drift.diff_lines:
|
||||||
|
for line in drift.diff_lines[:20]:
|
||||||
|
print(f" {line}")
|
||||||
|
if len(drift.diff_lines) > 20:
|
||||||
|
print(f" ... ({len(drift.diff_lines) - 20} more lines)")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f" Total: {total} OK: {_GREEN}{ok}{_RESET} "
|
||||||
|
f"Missing: {_RED}{missing}{_RESET} "
|
||||||
|
f"Extra: {_YELLOW}{extra}{_RESET} "
|
||||||
|
f"Outdated: {_CYAN}{outdated}{_RESET}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if not report.has_drift:
|
||||||
|
print(f"{_GREEN}{_BOLD}No drift detected. Skills are in sync.{_RESET}\n")
|
||||||
|
else:
|
||||||
|
print(f"{_YELLOW}{_BOLD}Drift detected. Run with --fix to sync missing/outdated skills.{_RESET}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def _render_json(report: AuditReport) -> None:
|
||||||
|
out = {
|
||||||
|
"has_drift": report.has_drift,
|
||||||
|
"repo_skills_dir": str(report.repo_root / "skills"),
|
||||||
|
"installed_skills_dir": str(report.installed_root),
|
||||||
|
"summary": {
|
||||||
|
"total": len(report.drifts),
|
||||||
|
"ok": len(report.by_status("OK")),
|
||||||
|
"missing": len(report.by_status("MISSING")),
|
||||||
|
"extra": len(report.by_status("EXTRA")),
|
||||||
|
"outdated": len(report.by_status("OUTDATED")),
|
||||||
|
},
|
||||||
|
"drifts": [
|
||||||
|
{
|
||||||
|
"skill_path": d.skill_path,
|
||||||
|
"status": d.status,
|
||||||
|
"repo_hash": d.repo_hash,
|
||||||
|
"installed_hash": d.installed_hash,
|
||||||
|
"diff_line_count": len(d.diff_lines),
|
||||||
|
}
|
||||||
|
for d in report.drifts
|
||||||
|
if d.status != "OK"
|
||||||
|
],
|
||||||
|
}
|
||||||
|
print(json.dumps(out, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Audit wizard skills for drift between repo and installed location."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--repo-root",
|
||||||
|
default=str(Path(__file__).parent.parent),
|
||||||
|
help="Root of the hermes-agent repo (default: parent of this script)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--installed-root",
|
||||||
|
default=None,
|
||||||
|
help="Installed skills directory (default: auto-detect from HERMES_HOME)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--fix",
|
||||||
|
action="store_true",
|
||||||
|
help="Copy missing/outdated skills from repo to installed location",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--diff",
|
||||||
|
action="store_true",
|
||||||
|
help="Show diff for outdated skills",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
repo_root = Path(args.repo_root).resolve()
|
||||||
|
installed_root = Path(args.installed_root).resolve() if args.installed_root else None
|
||||||
|
|
||||||
|
report = run_audit(repo_root, installed_root)
|
||||||
|
|
||||||
|
if args.fix:
|
||||||
|
apply_fix(report)
|
||||||
|
# Re-run audit after fix to show updated state
|
||||||
|
report = run_audit(repo_root, installed_root)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
_render_json(report)
|
||||||
|
else:
|
||||||
|
_render_terminal(report, show_diff=args.diff)
|
||||||
|
|
||||||
|
sys.exit(0 if not report.has_drift else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
378
wizard-bootstrap/wizard_bootstrap.py
Normal file
378
wizard-bootstrap/wizard_bootstrap.py
Normal file
@@ -0,0 +1,378 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
wizard_bootstrap.py — Wizard Environment Validator
|
||||||
|
|
||||||
|
Validates that a new wizard's forge environment is ready:
|
||||||
|
1. Python version check (>=3.11)
|
||||||
|
2. Core dependencies installed
|
||||||
|
3. Gitea authentication
|
||||||
|
4. Telegram connectivity
|
||||||
|
5. Smoke test (hermes import)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py --fix
|
||||||
|
python wizard-bootstrap/wizard_bootstrap.py --json
|
||||||
|
|
||||||
|
Exits 0 if all checks pass, 1 if any check fails.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import importlib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Result model
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CheckResult:
|
||||||
|
name: str
|
||||||
|
passed: bool
|
||||||
|
message: str
|
||||||
|
fix_hint: Optional[str] = None
|
||||||
|
detail: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BootstrapReport:
|
||||||
|
checks: list[CheckResult] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def passed(self) -> bool:
|
||||||
|
return all(c.passed for c in self.checks)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def failed(self) -> list[CheckResult]:
|
||||||
|
return [c for c in self.checks if not c.passed]
|
||||||
|
|
||||||
|
def add(self, result: CheckResult) -> None:
|
||||||
|
self.checks.append(result)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Individual checks
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def check_python_version() -> CheckResult:
|
||||||
|
"""Require Python >= 3.11."""
|
||||||
|
major, minor, micro = sys.version_info[:3]
|
||||||
|
ok = (major, minor) >= (3, 11)
|
||||||
|
return CheckResult(
|
||||||
|
name="python_version",
|
||||||
|
passed=ok,
|
||||||
|
message=f"Python {major}.{minor}.{micro}",
|
||||||
|
fix_hint="Install Python 3.11+ via uv, pyenv, or your OS package manager.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_core_deps() -> CheckResult:
|
||||||
|
"""Verify that hermes core Python packages are importable."""
|
||||||
|
required = [
|
||||||
|
"openai",
|
||||||
|
"anthropic",
|
||||||
|
"dotenv",
|
||||||
|
"yaml",
|
||||||
|
"rich",
|
||||||
|
"requests",
|
||||||
|
"pydantic",
|
||||||
|
"prompt_toolkit",
|
||||||
|
]
|
||||||
|
missing = []
|
||||||
|
for pkg in required:
|
||||||
|
# dotenv ships as 'python-dotenv' but imports as 'dotenv'
|
||||||
|
try:
|
||||||
|
importlib.import_module(pkg)
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
missing.append(pkg)
|
||||||
|
|
||||||
|
if missing:
|
||||||
|
return CheckResult(
|
||||||
|
name="core_deps",
|
||||||
|
passed=False,
|
||||||
|
message=f"Missing packages: {', '.join(missing)}",
|
||||||
|
fix_hint="Run: uv pip install -r requirements.txt (or: pip install -r requirements.txt)",
|
||||||
|
)
|
||||||
|
return CheckResult(name="core_deps", passed=True, message="All core packages importable")
|
||||||
|
|
||||||
|
|
||||||
|
def check_hermes_importable() -> CheckResult:
|
||||||
|
"""Smoke-test: import hermes_constants (no side effects)."""
|
||||||
|
# Add repo root to sys.path so we can import regardless of cwd
|
||||||
|
repo_root = str(Path(__file__).parent.parent)
|
||||||
|
if repo_root not in sys.path:
|
||||||
|
sys.path.insert(0, repo_root)
|
||||||
|
try:
|
||||||
|
import hermes_constants # noqa: F401
|
||||||
|
|
||||||
|
return CheckResult(name="hermes_smoke", passed=True, message="hermes_constants imported OK")
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="hermes_smoke",
|
||||||
|
passed=False,
|
||||||
|
message=f"Import error: {exc}",
|
||||||
|
fix_hint="Ensure you are in the hermes-agent repo root and your venv is active.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_gitea_auth() -> CheckResult:
|
||||||
|
"""Verify Gitea token env var is set and the API responds."""
|
||||||
|
token = os.environ.get("GITEA_TOKEN") or os.environ.get("FORGE_TOKEN")
|
||||||
|
if not token:
|
||||||
|
return CheckResult(
|
||||||
|
name="gitea_auth",
|
||||||
|
passed=False,
|
||||||
|
message="GITEA_TOKEN / FORGE_TOKEN not set",
|
||||||
|
fix_hint="Export GITEA_TOKEN=<your-token> in your shell or ~/.hermes/.env",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Attempt a lightweight API call — list repos endpoint returns quickly
|
||||||
|
forge_url = os.environ.get("FORGE_URL", "https://forge.alexanderwhitestone.com")
|
||||||
|
try:
|
||||||
|
import requests # noqa: PLC0415
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
f"{forge_url}/api/v1/repos/search",
|
||||||
|
headers={"Authorization": f"token {token}"},
|
||||||
|
params={"limit": 1},
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return CheckResult(name="gitea_auth", passed=True, message="Gitea API reachable and token valid")
|
||||||
|
return CheckResult(
|
||||||
|
name="gitea_auth",
|
||||||
|
passed=False,
|
||||||
|
message=f"Gitea API returned HTTP {resp.status_code}",
|
||||||
|
fix_hint="Check that your GITEA_TOKEN is correct and not expired.",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="gitea_auth",
|
||||||
|
passed=False,
|
||||||
|
message=f"Gitea API unreachable: {exc}",
|
||||||
|
fix_hint="Check network connectivity and FORGE_URL env var.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_telegram_connectivity() -> CheckResult:
|
||||||
|
"""Verify Telegram bot token is set and the Bot API responds."""
|
||||||
|
token = os.environ.get("TELEGRAM_BOT_TOKEN")
|
||||||
|
if not token:
|
||||||
|
return CheckResult(
|
||||||
|
name="telegram",
|
||||||
|
passed=False,
|
||||||
|
message="TELEGRAM_BOT_TOKEN not set",
|
||||||
|
fix_hint="Export TELEGRAM_BOT_TOKEN=<token> in your shell or ~/.hermes/.env",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import requests # noqa: PLC0415
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
f"https://api.telegram.org/bot{token}/getMe",
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
username = data.get("result", {}).get("username", "?")
|
||||||
|
return CheckResult(
|
||||||
|
name="telegram",
|
||||||
|
passed=True,
|
||||||
|
message=f"Telegram bot @{username} reachable",
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="telegram",
|
||||||
|
passed=False,
|
||||||
|
message=f"Telegram API returned HTTP {resp.status_code}",
|
||||||
|
fix_hint="Check that TELEGRAM_BOT_TOKEN is valid.",
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return CheckResult(
|
||||||
|
name="telegram",
|
||||||
|
passed=False,
|
||||||
|
message=f"Telegram unreachable: {exc}",
|
||||||
|
fix_hint="Check network connectivity.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_env_vars() -> CheckResult:
|
||||||
|
"""Check that at least one LLM provider key is configured."""
|
||||||
|
provider_keys = [
|
||||||
|
"OPENROUTER_API_KEY",
|
||||||
|
"ANTHROPIC_API_KEY",
|
||||||
|
"ANTHROPIC_TOKEN",
|
||||||
|
"OPENAI_API_KEY",
|
||||||
|
"GLM_API_KEY",
|
||||||
|
"KIMI_API_KEY",
|
||||||
|
"MINIMAX_API_KEY",
|
||||||
|
]
|
||||||
|
found = [k for k in provider_keys if os.environ.get(k)]
|
||||||
|
if found:
|
||||||
|
return CheckResult(
|
||||||
|
name="llm_provider",
|
||||||
|
passed=True,
|
||||||
|
message=f"LLM provider key(s) present: {', '.join(found)}",
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="llm_provider",
|
||||||
|
passed=False,
|
||||||
|
message="No LLM provider API key found",
|
||||||
|
fix_hint=(
|
||||||
|
"Set at least one of: OPENROUTER_API_KEY, ANTHROPIC_API_KEY, OPENAI_API_KEY "
|
||||||
|
"in ~/.hermes/.env or your shell."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def check_hermes_home() -> CheckResult:
|
||||||
|
"""Verify HERMES_HOME directory exists and is writable."""
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
if not hermes_home.exists():
|
||||||
|
return CheckResult(
|
||||||
|
name="hermes_home",
|
||||||
|
passed=False,
|
||||||
|
message=f"HERMES_HOME does not exist: {hermes_home}",
|
||||||
|
fix_hint="Run 'hermes setup' or create the directory manually.",
|
||||||
|
)
|
||||||
|
if not os.access(hermes_home, os.W_OK):
|
||||||
|
return CheckResult(
|
||||||
|
name="hermes_home",
|
||||||
|
passed=False,
|
||||||
|
message=f"HERMES_HOME not writable: {hermes_home}",
|
||||||
|
fix_hint=f"Fix permissions: chmod u+w {hermes_home}",
|
||||||
|
)
|
||||||
|
return CheckResult(
|
||||||
|
name="hermes_home",
|
||||||
|
passed=True,
|
||||||
|
message=f"HERMES_HOME OK: {hermes_home}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Runner
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _load_dotenv_if_available() -> None:
|
||||||
|
"""Load ~/.hermes/.env so token checks work without manual export."""
|
||||||
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
|
env_path = hermes_home / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv # noqa: PLC0415
|
||||||
|
|
||||||
|
load_dotenv(env_path, override=False)
|
||||||
|
except Exception:
|
||||||
|
pass # dotenv not installed yet — that's fine
|
||||||
|
|
||||||
|
|
||||||
|
def run_all_checks() -> BootstrapReport:
|
||||||
|
report = BootstrapReport()
|
||||||
|
_load_dotenv_if_available()
|
||||||
|
|
||||||
|
checks = [
|
||||||
|
check_python_version,
|
||||||
|
check_core_deps,
|
||||||
|
check_hermes_importable,
|
||||||
|
check_hermes_home,
|
||||||
|
check_env_vars,
|
||||||
|
check_gitea_auth,
|
||||||
|
check_telegram_connectivity,
|
||||||
|
]
|
||||||
|
for fn in checks:
|
||||||
|
result = fn()
|
||||||
|
report.add(result)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Rendering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_GREEN = "\033[32m"
|
||||||
|
_RED = "\033[31m"
|
||||||
|
_YELLOW = "\033[33m"
|
||||||
|
_BOLD = "\033[1m"
|
||||||
|
_RESET = "\033[0m"
|
||||||
|
|
||||||
|
|
||||||
|
def _render_terminal(report: BootstrapReport) -> None:
|
||||||
|
print(f"\n{_BOLD}=== Wizard Bootstrap — Environment Check ==={_RESET}\n")
|
||||||
|
for check in report.checks:
|
||||||
|
icon = f"{_GREEN}✓{_RESET}" if check.passed else f"{_RED}✗{_RESET}"
|
||||||
|
label = check.name.replace("_", " ").title()
|
||||||
|
print(f" {icon} {_BOLD}{label}{_RESET}: {check.message}")
|
||||||
|
if not check.passed and check.fix_hint:
|
||||||
|
print(f" {_YELLOW}→ {check.fix_hint}{_RESET}")
|
||||||
|
if check.detail:
|
||||||
|
print(f" {check.detail}")
|
||||||
|
|
||||||
|
total = len(report.checks)
|
||||||
|
passed = sum(1 for c in report.checks if c.passed)
|
||||||
|
print()
|
||||||
|
if report.passed:
|
||||||
|
print(f"{_GREEN}{_BOLD}All {total} checks passed. Forge is ready.{_RESET}\n")
|
||||||
|
else:
|
||||||
|
failed = total - passed
|
||||||
|
print(
|
||||||
|
f"{_RED}{_BOLD}{failed}/{total} check(s) failed.{_RESET} "
|
||||||
|
f"Resolve the issues above before going online.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_json(report: BootstrapReport) -> None:
|
||||||
|
out = {
|
||||||
|
"passed": report.passed,
|
||||||
|
"summary": {
|
||||||
|
"total": len(report.checks),
|
||||||
|
"passed": sum(1 for c in report.checks if c.passed),
|
||||||
|
"failed": sum(1 for c in report.checks if not c.passed),
|
||||||
|
},
|
||||||
|
"checks": [
|
||||||
|
{
|
||||||
|
"name": c.name,
|
||||||
|
"passed": c.passed,
|
||||||
|
"message": c.message,
|
||||||
|
"fix_hint": c.fix_hint,
|
||||||
|
"detail": c.detail,
|
||||||
|
}
|
||||||
|
for c in report.checks
|
||||||
|
],
|
||||||
|
}
|
||||||
|
print(json.dumps(out, indent=2))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI entry point
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Validate the forge wizard environment."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
report = run_all_checks()
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
_render_json(report)
|
||||||
|
else:
|
||||||
|
_render_terminal(report)
|
||||||
|
|
||||||
|
sys.exit(0 if report.passed else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user