2026-04-07 02:02:50 +00:00
4 changed files with 199 additions and 0 deletions
--- a/.gitea/workflows/notebook-ci.yml
+++ b/.gitea/workflows/notebook-ci.yml
@@ -0,0 +1,44 @@
+name: Notebook CI
+
+on:
+  push:
+    paths:
+      - 'notebooks/**'
+  pull_request:
+    paths:
+      - 'notebooks/**'
+
+jobs:
+  notebook-smoke:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          pip install papermill jupytext nbformat
+          python -m ipykernel install --user --name python3
+
+      - name: Execute system health notebook
+        run: |
+          papermill notebooks/agent_task_system_health.ipynb /tmp/output.ipynb \
+            -p threshold 0.5 \
+            -p hostname ci-runner
+
+      - name: Verify output has results
+        run: |
+          python -c "
+          import json
+          nb = json.load(open('/tmp/output.ipynb'))
+          code_cells = [c for c in nb['cells'] if c['cell_type'] == 'code']
+          outputs = [c.get('outputs', []) for c in code_cells]
+          total_outputs = sum(len(o) for o in outputs)
+          assert total_outputs > 0, 'Notebook produced no outputs'
+          print(f'Notebook executed successfully with {total_outputs} output(s)')
+          "
--- a/docs/NOTEBOOK_WORKFLOW.md
+++ b/docs/NOTEBOOK_WORKFLOW.md
@@ -0,0 +1,57 @@
+# Notebook Workflow for Agent Tasks
+
+This directory demonstrates a sovereign, version-controlled workflow for LLM agent tasks using Jupyter notebooks.
+
+## Philosophy
+
+- **`.py` files are the source of truth`** — authored and reviewed as plain Python with `# %%` cell markers (via Jupytext)
+- **`.ipynb` files are generated artifacts** — auto-created from `.py` for execution and rich viewing
+- **Papermill parameterizes and executes** — each run produces an output notebook with code, narrative, and results preserved
+- **Output notebooks are audit artifacts** — every execution leaves a permanent, replayable record
+
+## File Layout
+
+```
+notebooks/
+  agent_task_system_health.py      # Source of truth (Jupytext)
+  agent_task_system_health.ipynb   # Generated from .py
+docs/
+  NOTEBOOK_WORKFLOW.md             # This document
+.gitea/workflows/
+  notebook-ci.yml                  # CI gate: executes notebooks on PR/push
+```
+
+## How Agents Work With Notebooks
+
+1. **Create** — Agent generates a `.py` notebook using `# %% [markdown]` and `# %%` code blocks
+2. **Review** — PR reviewers see clean diffs in Gitea (no JSON noise)
+3. **Generate** — `jupytext --to ipynb` produces the `.ipynb` before merge
+4. **Execute** — Papermill runs the notebook with injected parameters
+5. **Archive** — Output notebook is committed to a `reports/` branch or artifact store
+
+## Converting Between Formats
+
+```bash
+# .py -> .ipynb
+jupytext --to ipynb notebooks/agent_task_system_health.py
+
+# .ipynb -> .py
+jupytext --to py notebooks/agent_task_system_health.ipynb
+
+# Execute with parameters
+papermill notebooks/agent_task_system_health.ipynb output.ipynb \
+  -p threshold 1.0 -p hostname forge-vps-01
+```
+
+## CI Gate
+
+The `notebook-ci.yml` workflow executes all notebooks in `notebooks/` on every PR and push, ensuring that checked-in notebooks still run and produce outputs.
+
+## Why This Matters
+
+| Problem | Notebook Solution |
+|---|---|
+| Ephemeral agent reasoning | Markdown cells narrate the thought process |
+| Stateless single-turn tools | Stateful cells persist variables across steps |
+| Unreviewable binary artifacts | `.py` source is diffable and PR-friendly |
+| No execution audit trail | Output notebook preserves code + outputs + metadata |
--- a/notebooks/agent_task_system_health.ipynb
+++ b/notebooks/agent_task_system_health.ipynb
@@ -0,0 +1,57 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Parameterized Agent Task: System Health Check\n",
+        "\n",
+        "This notebook demonstrates how an LLM agent can generate a task notebook,\n",
+        "a scheduler can parameterize and execute it via papermill,\n",
+        "and the output becomes a persistent audit artifact."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {"tags": ["parameters"]},
+      "outputs": [],
+      "source": [
+        "# Default parameters — papermill will inject overrides here\n",
+        "threshold = 1.0\n",
+        "hostname = \"localhost\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import json, subprocess, datetime\n",
+        "gather_time = datetime.datetime.now().isoformat()\n",
+        "load_avg = subprocess.check_output([\"cat\", \"/proc/loadavg\"]).decode().strip()\n",
+        "load_values = [float(x) for x in load_avg.split()[:3]]\n",
+        "avg_load = sum(load_values) / len(load_values)\n",
+        "intervention_needed = avg_load > threshold\n",
+        "report = {\n",
+        "    \"hostname\": hostname,\n",
+        "    \"threshold\": threshold,\n",
+        "    \"avg_load\": round(avg_load, 3),\n",
+        "    \"intervention_needed\": intervention_needed,\n",
+        "    \"gathered_at\": gather_time\n",
+        "}\n",
+        "print(json.dumps(report, indent=2))"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
--- a/notebooks/agent_task_system_health.py
+++ b/notebooks/agent_task_system_health.py
@@ -0,0 +1,41 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.19.1
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# %% [markdown]
+# # Parameterized Agent Task: System Health Check
+#
+# This notebook demonstrates how an LLM agent can generate a task notebook,
+# a scheduler can parameterize and execute it via papermill,
+# and the output becomes a persistent audit artifact.
+
+# %% tags=["parameters"]
+# Default parameters — papermill will inject overrides here
+threshold = 1.0
+hostname = "localhost"
+
+# %%
+import json, subprocess, datetime
+gather_time = datetime.datetime.now().isoformat()
+load_avg = subprocess.check_output(["cat", "/proc/loadavg"]).decode().strip()
+load_values = [float(x) for x in load_avg.split()[:3]]
+avg_load = sum(load_values) / len(load_values)
+intervention_needed = avg_load > threshold
+report = {
+    "hostname": hostname,
+    "threshold": threshold,
+    "avg_load": round(avg_load, 3),
+    "intervention_needed": intervention_needed,
+    "gathered_at": gather_time
+}
+print(json.dumps(report, indent=2))