Compare commits
1 Commits
bezalel/no
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c9c3fc94f8 |
@@ -1,44 +0,0 @@
|
||||
name: Notebook CI
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'notebooks/**'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'notebooks/**'
|
||||
|
||||
jobs:
|
||||
notebook-smoke:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install papermill jupytext nbformat
|
||||
python -m ipykernel install --user --name python3
|
||||
|
||||
- name: Execute system health notebook
|
||||
run: |
|
||||
papermill notebooks/agent_task_system_health.ipynb /tmp/output.ipynb \
|
||||
-p threshold 0.5 \
|
||||
-p hostname ci-runner
|
||||
|
||||
- name: Verify output has results
|
||||
run: |
|
||||
python -c "
|
||||
import json
|
||||
nb = json.load(open('/tmp/output.ipynb'))
|
||||
code_cells = [c for c in nb['cells'] if c['cell_type'] == 'code']
|
||||
outputs = [c.get('outputs', []) for c in code_cells]
|
||||
total_outputs = sum(len(o) for o in outputs)
|
||||
assert total_outputs > 0, 'Notebook produced no outputs'
|
||||
print(f'Notebook executed successfully with {total_outputs} output(s)')
|
||||
"
|
||||
13
.github/CODEOWNERS
vendored
Normal file
13
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
# Default owners for all files
|
||||
* @Timmy
|
||||
|
||||
# Critical paths require explicit review
|
||||
/gateway/ @Timmy
|
||||
/tools/ @Timmy
|
||||
/agent/ @Timmy
|
||||
/config/ @Timmy
|
||||
/scripts/ @Timmy
|
||||
/.github/workflows/ @Timmy
|
||||
/pyproject.toml @Timmy
|
||||
/requirements.txt @Timmy
|
||||
/Dockerfile @Timmy
|
||||
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
Normal file
99
.github/ISSUE_TEMPLATE/security_pr_checklist.yml
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
name: "🔒 Security PR Checklist"
|
||||
description: "Use this when your PR touches authentication, file I/O, external API calls, or other sensitive paths."
|
||||
title: "[Security Review]: "
|
||||
labels: ["security", "needs-review"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Security Pre-Merge Review
|
||||
Complete this checklist before requesting review on PRs that touch **authentication, file I/O, external API calls, or secrets handling**.
|
||||
|
||||
- type: input
|
||||
id: pr-link
|
||||
attributes:
|
||||
label: Pull Request
|
||||
description: Link to the PR being reviewed
|
||||
placeholder: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent/pulls/XXX"
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: change-type
|
||||
attributes:
|
||||
label: Change Category
|
||||
description: What kind of sensitive change does this PR make?
|
||||
multiple: true
|
||||
options:
|
||||
- Authentication / Authorization
|
||||
- File I/O (read/write/delete)
|
||||
- External API calls (outbound HTTP/network)
|
||||
- Secret / credential handling
|
||||
- Command execution (subprocess/shell)
|
||||
- Dependency addition or update
|
||||
- Configuration changes
|
||||
- CI/CD pipeline changes
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: secrets-checklist
|
||||
attributes:
|
||||
label: Secrets & Credentials
|
||||
options:
|
||||
- label: No secrets, API keys, or credentials are hardcoded
|
||||
required: true
|
||||
- label: All sensitive values are loaded from environment variables or a secrets manager
|
||||
required: true
|
||||
- label: Test fixtures use fake/placeholder values, not real credentials
|
||||
required: true
|
||||
|
||||
- type: checkboxes
|
||||
id: input-validation-checklist
|
||||
attributes:
|
||||
label: Input Validation
|
||||
options:
|
||||
- label: All external input (user, API, file) is validated before use
|
||||
required: true
|
||||
- label: File paths are validated against path traversal (`../`, null bytes, absolute paths)
|
||||
- label: URLs are validated for SSRF (blocked private/metadata IPs)
|
||||
- label: Shell commands do not use `shell=True` with user-controlled input
|
||||
|
||||
- type: checkboxes
|
||||
id: auth-checklist
|
||||
attributes:
|
||||
label: Authentication & Authorization (if applicable)
|
||||
options:
|
||||
- label: Authentication tokens are not logged or exposed in error messages
|
||||
- label: Authorization checks happen server-side, not just client-side
|
||||
- label: Session tokens are properly scoped and have expiry
|
||||
|
||||
- type: checkboxes
|
||||
id: supply-chain-checklist
|
||||
attributes:
|
||||
label: Supply Chain
|
||||
options:
|
||||
- label: New dependencies are pinned to a specific version range
|
||||
- label: Dependencies come from trusted sources (PyPI, npm, official repos)
|
||||
- label: No `.pth` files or install hooks that execute arbitrary code
|
||||
- label: "`pip-audit` passes (no known CVEs in added dependencies)"
|
||||
|
||||
- type: textarea
|
||||
id: threat-model
|
||||
attributes:
|
||||
label: Threat Model Notes
|
||||
description: |
|
||||
Briefly describe the attack surface this change introduces or modifies, and how it is mitigated.
|
||||
placeholder: |
|
||||
This PR adds a new outbound HTTP call to the OpenRouter API.
|
||||
Mitigation: URL is hardcoded (no user input), response is parsed with strict schema validation.
|
||||
|
||||
- type: textarea
|
||||
id: testing
|
||||
attributes:
|
||||
label: Security Testing Done
|
||||
description: What security testing did you perform?
|
||||
placeholder: |
|
||||
- Ran validate_security.py — all checks pass
|
||||
- Tested path traversal attempts manually
|
||||
- Verified no secrets in git diff
|
||||
82
.github/workflows/dependency-audit.yml
vendored
Normal file
82
.github/workflows/dependency-audit.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: Dependency Audit
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'requirements.txt'
|
||||
- 'pyproject.toml'
|
||||
- 'uv.lock'
|
||||
schedule:
|
||||
- cron: '0 8 * * 1' # Weekly on Monday
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
audit:
|
||||
name: Audit Python dependencies
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: astral-sh/setup-uv@v5
|
||||
- name: Set up Python
|
||||
run: uv python install 3.11
|
||||
- name: Install pip-audit
|
||||
run: uv pip install --system pip-audit
|
||||
- name: Run pip-audit
|
||||
id: audit
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Run pip-audit against the lock file/requirements
|
||||
if pip-audit --requirement requirements.txt -f json -o /tmp/audit-results.json 2>/tmp/audit-stderr.txt; then
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
# Check severity
|
||||
CRITICAL=$(python3 -c "
|
||||
import json, sys
|
||||
data = json.load(open('/tmp/audit-results.json'))
|
||||
vulns = data.get('dependencies', [])
|
||||
for d in vulns:
|
||||
for v in d.get('vulns', []):
|
||||
aliases = v.get('aliases', [])
|
||||
# Check for critical/high CVSS
|
||||
if any('CVSS' in str(a) for a in aliases):
|
||||
print('true')
|
||||
sys.exit(0)
|
||||
print('false')
|
||||
" 2>/dev/null || echo 'false')
|
||||
echo "critical=${CRITICAL}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
continue-on-error: true
|
||||
- name: Post results comment
|
||||
if: steps.audit.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
BODY="## ⚠️ Dependency Vulnerabilities Detected
|
||||
|
||||
\`pip-audit\` found vulnerable dependencies in this PR. Review and update before merging.
|
||||
|
||||
\`\`\`
|
||||
$(cat /tmp/audit-results.json | python3 -c "
|
||||
import json, sys
|
||||
data = json.load(sys.stdin)
|
||||
for dep in data.get('dependencies', []):
|
||||
for v in dep.get('vulns', []):
|
||||
print(f\" {dep['name']}=={dep['version']}: {v['id']} - {v.get('description', '')[:120]}\")
|
||||
" 2>/dev/null || cat /tmp/audit-stderr.txt)
|
||||
\`\`\`
|
||||
|
||||
---
|
||||
*Automated scan by [dependency-audit](/.github/workflows/dependency-audit.yml)*"
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
- name: Fail on vulnerabilities
|
||||
if: steps.audit.outputs.found == 'true'
|
||||
run: |
|
||||
echo "::error::Vulnerable dependencies detected. See PR comment for details."
|
||||
cat /tmp/audit-results.json | python3 -m json.tool || true
|
||||
exit 1
|
||||
114
.github/workflows/quarterly-security-audit.yml
vendored
Normal file
114
.github/workflows/quarterly-security-audit.yml
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
name: Quarterly Security Audit
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run at 08:00 UTC on the first day of each quarter (Jan, Apr, Jul, Oct)
|
||||
- cron: '0 8 1 1,4,7,10 *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
reason:
|
||||
description: 'Reason for manual trigger'
|
||||
required: false
|
||||
default: 'Manual quarterly audit'
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
create-audit-issue:
|
||||
name: Create quarterly security audit issue
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get quarter info
|
||||
id: quarter
|
||||
run: |
|
||||
MONTH=$(date +%-m)
|
||||
YEAR=$(date +%Y)
|
||||
QUARTER=$(( (MONTH - 1) / 3 + 1 ))
|
||||
echo "quarter=Q${QUARTER}-${YEAR}" >> "$GITHUB_OUTPUT"
|
||||
echo "year=${YEAR}" >> "$GITHUB_OUTPUT"
|
||||
echo "q=${QUARTER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Create audit issue
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
QUARTER="${{ steps.quarter.outputs.quarter }}"
|
||||
|
||||
gh issue create \
|
||||
--title "[$QUARTER] Quarterly Security Audit" \
|
||||
--label "security,audit" \
|
||||
--body "$(cat <<'BODY'
|
||||
## Quarterly Security Audit — ${{ steps.quarter.outputs.quarter }}
|
||||
|
||||
This is the scheduled quarterly security audit for the hermes-agent project. Complete each section and close this issue when the audit is done.
|
||||
|
||||
**Audit Period:** ${{ steps.quarter.outputs.quarter }}
|
||||
**Due:** End of quarter
|
||||
**Owner:** Assign to a maintainer
|
||||
|
||||
---
|
||||
|
||||
## 1. Open Issues & PRs Audit
|
||||
|
||||
Review all open issues and PRs for security-relevant content. Tag any that touch attack surfaces with the `security` label.
|
||||
|
||||
- [ ] Review open issues older than 30 days for unaddressed security concerns
|
||||
- [ ] Tag security-relevant open PRs with `needs-security-review`
|
||||
- [ ] Check for any issues referencing CVEs or known vulnerabilities
|
||||
- [ ] Review recently closed security issues — are fixes deployed?
|
||||
|
||||
## 2. Dependency Audit
|
||||
|
||||
- [ ] Run `pip-audit` against current `requirements.txt` / `pyproject.toml`
|
||||
- [ ] Check `uv.lock` for any pinned versions with known CVEs
|
||||
- [ ] Review any `git+` dependencies for recent changes or compromise signals
|
||||
- [ ] Update vulnerable dependencies and open PRs for each
|
||||
|
||||
## 3. Critical Path Review
|
||||
|
||||
Review recent changes to attack-surface paths:
|
||||
|
||||
- [ ] `gateway/` — authentication, message routing, platform adapters
|
||||
- [ ] `tools/` — file I/O, command execution, web access
|
||||
- [ ] `agent/` — prompt handling, context management
|
||||
- [ ] `config/` — secrets loading, configuration parsing
|
||||
- [ ] `.github/workflows/` — CI/CD integrity
|
||||
|
||||
Run: `git log --since="3 months ago" --name-only -- gateway/ tools/ agent/ config/ .github/workflows/`
|
||||
|
||||
## 4. Secret Scan
|
||||
|
||||
- [ ] Run secret scanner on the full codebase (not just diffs)
|
||||
- [ ] Verify no credentials are present in git history
|
||||
- [ ] Confirm all API keys/tokens in use are rotated on a regular schedule
|
||||
|
||||
## 5. Access & Permissions Review
|
||||
|
||||
- [ ] Review who has write access to the main branch
|
||||
- [ ] Confirm branch protection rules are still in place (require PR + review)
|
||||
- [ ] Verify CI/CD secrets are scoped correctly (not over-permissioned)
|
||||
- [ ] Review CODEOWNERS file for accuracy
|
||||
|
||||
## 6. Vulnerability Triage
|
||||
|
||||
List any new vulnerabilities found this quarter:
|
||||
|
||||
| ID | Component | Severity | Status | Owner |
|
||||
|----|-----------|----------|--------|-------|
|
||||
| | | | | |
|
||||
|
||||
## 7. Action Items
|
||||
|
||||
| Action | Owner | Due Date | Status |
|
||||
|--------|-------|----------|--------|
|
||||
| | | | |
|
||||
|
||||
---
|
||||
|
||||
*Auto-generated by [quarterly-security-audit](/.github/workflows/quarterly-security-audit.yml). Close this issue when the audit is complete.*
|
||||
BODY
|
||||
)"
|
||||
136
.github/workflows/secret-scan.yml
vendored
Normal file
136
.github/workflows/secret-scan.yml
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
name: Secret Scan
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
scan:
|
||||
name: Scan for secrets
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Fetch base branch
|
||||
run: git fetch origin ${{ github.base_ref }}
|
||||
|
||||
- name: Scan diff for secrets
|
||||
id: scan
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Get only added lines from the diff (exclude deletions and context lines)
|
||||
DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- \
|
||||
':!*.lock' ':!uv.lock' ':!package-lock.json' ':!yarn.lock' \
|
||||
| grep '^+' | grep -v '^+++' || true)
|
||||
|
||||
FINDINGS=""
|
||||
CRITICAL=false
|
||||
|
||||
check() {
|
||||
local label="$1"
|
||||
local pattern="$2"
|
||||
local critical="${3:-false}"
|
||||
local matches
|
||||
matches=$(echo "$DIFF" | grep -oP "$pattern" || true)
|
||||
if [ -n "$matches" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **${label}**: pattern matched"
|
||||
if [ "$critical" = "true" ]; then
|
||||
CRITICAL=true
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# AWS keys — critical
|
||||
check "AWS Access Key" 'AKIA[0-9A-Z]{16}' true
|
||||
|
||||
# Private key headers — critical
|
||||
check "Private Key Header" '-----BEGIN (RSA|EC|DSA|OPENSSH|PGP) PRIVATE KEY' true
|
||||
|
||||
# OpenAI / Anthropic style keys
|
||||
check "OpenAI-style API key (sk-)" 'sk-[a-zA-Z0-9]{20,}' false
|
||||
|
||||
# GitHub tokens
|
||||
check "GitHub personal access token (ghp_)" 'ghp_[a-zA-Z0-9]{36}' true
|
||||
check "GitHub fine-grained PAT (github_pat_)" 'github_pat_[a-zA-Z0-9_]{1,}' true
|
||||
|
||||
# Slack tokens
|
||||
check "Slack bot token (xoxb-)" 'xoxb-[0-9A-Za-z\-]{10,}' true
|
||||
check "Slack user token (xoxp-)" 'xoxp-[0-9A-Za-z\-]{10,}' true
|
||||
|
||||
# Generic assignment patterns — exclude obvious placeholders
|
||||
GENERIC=$(echo "$DIFF" | grep -iP '(api_key|apikey|api-key|secret_key|access_token|auth_token)\s*[=:]\s*['"'"'"][^'"'"'"]{20,}['"'"'"]' \
|
||||
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx|<|>|\{\{)' || true)
|
||||
if [ -n "$GENERIC" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **Generic credential assignment**: possible hardcoded secret"
|
||||
fi
|
||||
|
||||
# .env additions with long values
|
||||
ENV_DIFF=$(git diff "origin/${{ github.base_ref }}"...HEAD -- '*.env' '**/.env' '.env*' \
|
||||
| grep '^+' | grep -v '^+++' || true)
|
||||
ENV_MATCHES=$(echo "$ENV_DIFF" | grep -P '^[A-Z_]+=.{16,}' \
|
||||
| grep -ivP '(fake|mock|test|placeholder|example|dummy|your[_-]|xxx)' || true)
|
||||
if [ -n "$ENV_MATCHES" ]; then
|
||||
FINDINGS="${FINDINGS}\n- **.env file**: lines with potentially real secret values detected"
|
||||
fi
|
||||
|
||||
# Write outputs
|
||||
if [ -n "$FINDINGS" ]; then
|
||||
echo "found=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "found=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
if [ "$CRITICAL" = "true" ]; then
|
||||
echo "critical=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "critical=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Store findings in a file to use in comment step
|
||||
printf "%b" "$FINDINGS" > /tmp/secret-findings.txt
|
||||
|
||||
- name: Post PR comment with findings
|
||||
if: steps.scan.outputs.found == 'true' && github.event_name == 'pull_request'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
FINDINGS=$(cat /tmp/secret-findings.txt)
|
||||
SEVERITY="warning"
|
||||
if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
|
||||
SEVERITY="CRITICAL"
|
||||
fi
|
||||
|
||||
BODY="## Secret Scan — ${SEVERITY} findings
|
||||
|
||||
The automated secret scanner detected potential secrets in the diff for this PR.
|
||||
|
||||
### Findings
|
||||
${FINDINGS}
|
||||
|
||||
### What to do
|
||||
1. Remove any real credentials from the diff immediately.
|
||||
2. If the match is a false positive (test fixture, placeholder), add a comment explaining why or rename the variable to include \`fake\`, \`mock\`, or \`test\`.
|
||||
3. Rotate any exposed credentials regardless of whether this PR is merged.
|
||||
|
||||
---
|
||||
*Automated scan by [secret-scan](/.github/workflows/secret-scan.yml)*"
|
||||
|
||||
gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
|
||||
|
||||
- name: Fail on critical secrets
|
||||
if: steps.scan.outputs.critical == 'true'
|
||||
run: |
|
||||
echo "::error::Critical secrets detected in diff (private keys, AWS keys, or GitHub tokens). Remove them before merging."
|
||||
exit 1
|
||||
|
||||
- name: Warn on non-critical findings
|
||||
if: steps.scan.outputs.found == 'true' && steps.scan.outputs.critical == 'false'
|
||||
run: |
|
||||
echo "::warning::Potential secrets detected in diff. Review the PR comment for details."
|
||||
25
.pre-commit-config.yaml
Normal file
25
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
repos:
|
||||
# Secret detection
|
||||
- repo: https://github.com/gitleaks/gitleaks
|
||||
rev: v8.21.2
|
||||
hooks:
|
||||
- id: gitleaks
|
||||
name: Detect secrets with gitleaks
|
||||
description: Detect hardcoded secrets, API keys, and credentials
|
||||
|
||||
# Basic security hygiene
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=500']
|
||||
- id: detect-private-key
|
||||
name: Detect private keys
|
||||
- id: check-merge-conflict
|
||||
- id: check-yaml
|
||||
- id: check-toml
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
args: ['--markdown-linebreak-ext=md']
|
||||
- id: no-commit-to-branch
|
||||
args: ['--branch', 'main']
|
||||
@@ -1,57 +0,0 @@
|
||||
# Notebook Workflow for Agent Tasks
|
||||
|
||||
This directory demonstrates a sovereign, version-controlled workflow for LLM agent tasks using Jupyter notebooks.
|
||||
|
||||
## Philosophy
|
||||
|
||||
- **`.py` files are the source of truth`** — authored and reviewed as plain Python with `# %%` cell markers (via Jupytext)
|
||||
- **`.ipynb` files are generated artifacts** — auto-created from `.py` for execution and rich viewing
|
||||
- **Papermill parameterizes and executes** — each run produces an output notebook with code, narrative, and results preserved
|
||||
- **Output notebooks are audit artifacts** — every execution leaves a permanent, replayable record
|
||||
|
||||
## File Layout
|
||||
|
||||
```
|
||||
notebooks/
|
||||
agent_task_system_health.py # Source of truth (Jupytext)
|
||||
agent_task_system_health.ipynb # Generated from .py
|
||||
docs/
|
||||
NOTEBOOK_WORKFLOW.md # This document
|
||||
.gitea/workflows/
|
||||
notebook-ci.yml # CI gate: executes notebooks on PR/push
|
||||
```
|
||||
|
||||
## How Agents Work With Notebooks
|
||||
|
||||
1. **Create** — Agent generates a `.py` notebook using `# %% [markdown]` and `# %%` code blocks
|
||||
2. **Review** — PR reviewers see clean diffs in Gitea (no JSON noise)
|
||||
3. **Generate** — `jupytext --to ipynb` produces the `.ipynb` before merge
|
||||
4. **Execute** — Papermill runs the notebook with injected parameters
|
||||
5. **Archive** — Output notebook is committed to a `reports/` branch or artifact store
|
||||
|
||||
## Converting Between Formats
|
||||
|
||||
```bash
|
||||
# .py -> .ipynb
|
||||
jupytext --to ipynb notebooks/agent_task_system_health.py
|
||||
|
||||
# .ipynb -> .py
|
||||
jupytext --to py notebooks/agent_task_system_health.ipynb
|
||||
|
||||
# Execute with parameters
|
||||
papermill notebooks/agent_task_system_health.ipynb output.ipynb \
|
||||
-p threshold 1.0 -p hostname forge-vps-01
|
||||
```
|
||||
|
||||
## CI Gate
|
||||
|
||||
The `notebook-ci.yml` workflow executes all notebooks in `notebooks/` on every PR and push, ensuring that checked-in notebooks still run and produce outputs.
|
||||
|
||||
## Why This Matters
|
||||
|
||||
| Problem | Notebook Solution |
|
||||
|---|---|
|
||||
| Ephemeral agent reasoning | Markdown cells narrate the thought process |
|
||||
| Stateless single-turn tools | Stateful cells persist variables across steps |
|
||||
| Unreviewable binary artifacts | `.py` source is diffable and PR-friendly |
|
||||
| No execution audit trail | Output notebook preserves code + outputs + metadata |
|
||||
@@ -1,57 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Parameterized Agent Task: System Health Check\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how an LLM agent can generate a task notebook,\n",
|
||||
"a scheduler can parameterize and execute it via papermill,\n",
|
||||
"and the output becomes a persistent audit artifact."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {"tags": ["parameters"]},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Default parameters — papermill will inject overrides here\n",
|
||||
"threshold = 1.0\n",
|
||||
"hostname = \"localhost\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json, subprocess, datetime\n",
|
||||
"gather_time = datetime.datetime.now().isoformat()\n",
|
||||
"load_avg = subprocess.check_output([\"cat\", \"/proc/loadavg\"]).decode().strip()\n",
|
||||
"load_values = [float(x) for x in load_avg.split()[:3]]\n",
|
||||
"avg_load = sum(load_values) / len(load_values)\n",
|
||||
"intervention_needed = avg_load > threshold\n",
|
||||
"report = {\n",
|
||||
" \"hostname\": hostname,\n",
|
||||
" \"threshold\": threshold,\n",
|
||||
" \"avg_load\": round(avg_load, 3),\n",
|
||||
" \"intervention_needed\": intervention_needed,\n",
|
||||
" \"gathered_at\": gather_time\n",
|
||||
"}\n",
|
||||
"print(json.dumps(report, indent=2))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
# ---
|
||||
# jupyter:
|
||||
# jupytext:
|
||||
# text_representation:
|
||||
# extension: .py
|
||||
# format_name: percent
|
||||
# format_version: '1.3'
|
||||
# jupytext_version: 1.19.1
|
||||
# kernelspec:
|
||||
# display_name: Python 3
|
||||
# language: python
|
||||
# name: python3
|
||||
# ---
|
||||
|
||||
# %% [markdown]
|
||||
# # Parameterized Agent Task: System Health Check
|
||||
#
|
||||
# This notebook demonstrates how an LLM agent can generate a task notebook,
|
||||
# a scheduler can parameterize and execute it via papermill,
|
||||
# and the output becomes a persistent audit artifact.
|
||||
|
||||
# %% tags=["parameters"]
|
||||
# Default parameters — papermill will inject overrides here
|
||||
threshold = 1.0
|
||||
hostname = "localhost"
|
||||
|
||||
# %%
|
||||
import json, subprocess, datetime
|
||||
gather_time = datetime.datetime.now().isoformat()
|
||||
load_avg = subprocess.check_output(["cat", "/proc/loadavg"]).decode().strip()
|
||||
load_values = [float(x) for x in load_avg.split()[:3]]
|
||||
avg_load = sum(load_values) / len(load_values)
|
||||
intervention_needed = avg_load > threshold
|
||||
report = {
|
||||
"hostname": hostname,
|
||||
"threshold": threshold,
|
||||
"avg_load": round(avg_load, 3),
|
||||
"intervention_needed": intervention_needed,
|
||||
"gathered_at": gather_time
|
||||
}
|
||||
print(json.dumps(report, indent=2))
|
||||
Reference in New Issue
Block a user