diff --git a/.gitea/workflows/notebook-ci.yml b/.gitea/workflows/notebook-ci.yml new file mode 100644 index 000000000..d3794b414 --- /dev/null +++ b/.gitea/workflows/notebook-ci.yml @@ -0,0 +1,44 @@ +name: Notebook CI + +on: + push: + paths: + - 'notebooks/**' + pull_request: + paths: + - 'notebooks/**' + +jobs: + notebook-smoke: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + pip install papermill jupytext nbformat + python -m ipykernel install --user --name python3 + + - name: Execute system health notebook + run: | + papermill notebooks/agent_task_system_health.ipynb /tmp/output.ipynb \ + -p threshold 0.5 \ + -p hostname ci-runner + + - name: Verify output has results + run: | + python -c " + import json + nb = json.load(open('/tmp/output.ipynb')) + code_cells = [c for c in nb['cells'] if c['cell_type'] == 'code'] + outputs = [c.get('outputs', []) for c in code_cells] + total_outputs = sum(len(o) for o in outputs) + assert total_outputs > 0, 'Notebook produced no outputs' + print(f'Notebook executed successfully with {total_outputs} output(s)') + " diff --git a/docs/NOTEBOOK_WORKFLOW.md b/docs/NOTEBOOK_WORKFLOW.md new file mode 100644 index 000000000..20c660cc6 --- /dev/null +++ b/docs/NOTEBOOK_WORKFLOW.md @@ -0,0 +1,57 @@ +# Notebook Workflow for Agent Tasks + +This directory demonstrates a sovereign, version-controlled workflow for LLM agent tasks using Jupyter notebooks. + +## Philosophy + +- **`.py` files are the source of truth`** — authored and reviewed as plain Python with `# %%` cell markers (via Jupytext) +- **`.ipynb` files are generated artifacts** — auto-created from `.py` for execution and rich viewing +- **Papermill parameterizes and executes** — each run produces an output notebook with code, narrative, and results preserved +- **Output notebooks are audit artifacts** — every execution leaves a permanent, replayable record + +## File Layout + +``` +notebooks/ + agent_task_system_health.py # Source of truth (Jupytext) + agent_task_system_health.ipynb # Generated from .py +docs/ + NOTEBOOK_WORKFLOW.md # This document +.gitea/workflows/ + notebook-ci.yml # CI gate: executes notebooks on PR/push +``` + +## How Agents Work With Notebooks + +1. **Create** — Agent generates a `.py` notebook using `# %% [markdown]` and `# %%` code blocks +2. **Review** — PR reviewers see clean diffs in Gitea (no JSON noise) +3. **Generate** — `jupytext --to ipynb` produces the `.ipynb` before merge +4. **Execute** — Papermill runs the notebook with injected parameters +5. **Archive** — Output notebook is committed to a `reports/` branch or artifact store + +## Converting Between Formats + +```bash +# .py -> .ipynb +jupytext --to ipynb notebooks/agent_task_system_health.py + +# .ipynb -> .py +jupytext --to py notebooks/agent_task_system_health.ipynb + +# Execute with parameters +papermill notebooks/agent_task_system_health.ipynb output.ipynb \ + -p threshold 1.0 -p hostname forge-vps-01 +``` + +## CI Gate + +The `notebook-ci.yml` workflow executes all notebooks in `notebooks/` on every PR and push, ensuring that checked-in notebooks still run and produce outputs. + +## Why This Matters + +| Problem | Notebook Solution | +|---|---| +| Ephemeral agent reasoning | Markdown cells narrate the thought process | +| Stateless single-turn tools | Stateful cells persist variables across steps | +| Unreviewable binary artifacts | `.py` source is diffable and PR-friendly | +| No execution audit trail | Output notebook preserves code + outputs + metadata | diff --git a/notebooks/agent_task_system_health.ipynb b/notebooks/agent_task_system_health.ipynb new file mode 100644 index 000000000..ab4815018 --- /dev/null +++ b/notebooks/agent_task_system_health.ipynb @@ -0,0 +1,57 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parameterized Agent Task: System Health Check\n", + "\n", + "This notebook demonstrates how an LLM agent can generate a task notebook,\n", + "a scheduler can parameterize and execute it via papermill,\n", + "and the output becomes a persistent audit artifact." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {"tags": ["parameters"]}, + "outputs": [], + "source": [ + "# Default parameters — papermill will inject overrides here\n", + "threshold = 1.0\n", + "hostname = \"localhost\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json, subprocess, datetime\n", + "gather_time = datetime.datetime.now().isoformat()\n", + "load_avg = subprocess.check_output([\"cat\", \"/proc/loadavg\"]).decode().strip()\n", + "load_values = [float(x) for x in load_avg.split()[:3]]\n", + "avg_load = sum(load_values) / len(load_values)\n", + "intervention_needed = avg_load > threshold\n", + "report = {\n", + " \"hostname\": hostname,\n", + " \"threshold\": threshold,\n", + " \"avg_load\": round(avg_load, 3),\n", + " \"intervention_needed\": intervention_needed,\n", + " \"gathered_at\": gather_time\n", + "}\n", + "print(json.dumps(report, indent=2))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/agent_task_system_health.py b/notebooks/agent_task_system_health.py new file mode 100644 index 000000000..6b9ef9049 --- /dev/null +++ b/notebooks/agent_task_system_health.py @@ -0,0 +1,41 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.19.1 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Parameterized Agent Task: System Health Check +# +# This notebook demonstrates how an LLM agent can generate a task notebook, +# a scheduler can parameterize and execute it via papermill, +# and the output becomes a persistent audit artifact. + +# %% tags=["parameters"] +# Default parameters — papermill will inject overrides here +threshold = 1.0 +hostname = "localhost" + +# %% +import json, subprocess, datetime +gather_time = datetime.datetime.now().isoformat() +load_avg = subprocess.check_output(["cat", "/proc/loadavg"]).decode().strip() +load_values = [float(x) for x in load_avg.split()[:3]] +avg_load = sum(load_values) / len(load_values) +intervention_needed = avg_load > threshold +report = { + "hostname": hostname, + "threshold": threshold, + "avg_load": round(avg_load, 3), + "intervention_needed": intervention_needed, + "gathered_at": gather_time +} +print(json.dumps(report, indent=2))