Compare commits
21 Commits
burn/20260
...
fix/ci-val
| Author | SHA1 | Date | |
|---|---|---|---|
| e8f2ecd2ea | |||
| 941cb25cbe | |||
| 05e9c1bf51 | |||
| 186d5f8056 | |||
| 86914554f1 | |||
| a4665679ab | |||
| 6f3ed4c963 | |||
| b84b97fb6f | |||
|
|
a65f736f54 | ||
| 8bf41c00e4 | |||
| 41046d4bf1 | |||
| 52d60198fc | |||
| ae7915fc20 | |||
|
|
d64b2e7561 | ||
| b172d23b98 | |||
| 71bf82d9fb | |||
| 5a649966ab | |||
|
|
179833148f | ||
| 41044d36ae | |||
| a9aed5a545 | |||
| c5e6494326 |
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
<!-- What changed and why. One paragraph max. -->
|
<!-- What changed and why. One paragraph max. -->
|
||||||
|
|
||||||
## Linked Issue
|
## Governing Issue
|
||||||
|
|
||||||
<!-- REQUIRED. Every PR must reference at least one issue. Max 3 issues per PR. -->
|
<!-- REQUIRED. Every PR must reference at least one issue. Max 3 issues per PR. -->
|
||||||
<!-- Closes #ISSUENUM -->
|
<!-- Closes #ISSUENUM -->
|
||||||
@@ -10,18 +10,20 @@
|
|||||||
|
|
||||||
## Acceptance Criteria
|
## Acceptance Criteria
|
||||||
|
|
||||||
<!-- What specific outcomes does this PR deliver? Check each when proven. -->
|
<!-- List the specific outcomes this PR delivers. Check each only when proven. -->
|
||||||
|
<!-- Copy these from the governing issue if it has them. -->
|
||||||
|
|
||||||
- [ ] Criterion 1
|
- [ ] Criterion 1
|
||||||
- [ ] Criterion 2
|
- [ ] Criterion 2
|
||||||
|
|
||||||
## Proof
|
## Proof
|
||||||
|
|
||||||
### What was tested
|
|
||||||
|
|
||||||
<!-- Paste the exact commands, output, log paths, or world-state artifacts that prove the acceptance criteria were met. -->
|
|
||||||
<!-- No proof = no merge. See CONTRIBUTING.md for the full standard. -->
|
<!-- No proof = no merge. See CONTRIBUTING.md for the full standard. -->
|
||||||
|
|
||||||
|
### Commands / logs / world-state proof
|
||||||
|
|
||||||
|
<!-- Paste the exact commands, output, log paths, or world-state artifacts that prove each acceptance criterion was met. -->
|
||||||
|
|
||||||
```
|
```
|
||||||
$ <command you ran>
|
$ <command you ran>
|
||||||
<relevant output>
|
<relevant output>
|
||||||
@@ -42,8 +44,11 @@ $ <command you ran>
|
|||||||
|
|
||||||
## Checklist
|
## Checklist
|
||||||
|
|
||||||
- [ ] Proof meets CONTRIBUTING.md standard (exact commands, output, or artifacts)
|
<!-- Complete every item before requesting review. -->
|
||||||
- [ ] Python files pass syntax check (`python -c "import ast; ast.parse(open('file.py').read())"`)
|
|
||||||
- [ ] Shell scripts are executable (`chmod +x`)
|
- [ ] PR body references at least one issue number (`Closes #N` or `Refs #N`)
|
||||||
|
- [ ] Changed files are syntactically valid (`python -c "import ast; ast.parse(open(f).read())"`, `node --check`, `bash -n`)
|
||||||
|
- [ ] Proof meets CONTRIBUTING.md standard (exact commands, output, or artifacts — not "looks right")
|
||||||
- [ ] Branch is up-to-date with base
|
- [ ] Branch is up-to-date with base
|
||||||
- [ ] No more than 3 unrelated issues bundled in this PR
|
- [ ] No more than 3 unrelated issues bundled in this PR
|
||||||
|
- [ ] Shell scripts are executable (`chmod +x`)
|
||||||
|
|||||||
@@ -112,23 +112,10 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
- uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.11'
|
||||||
|
- name: Install PyYAML
|
||||||
|
run: pip install pyyaml
|
||||||
- name: Validate playbook structure
|
- name: Validate playbook structure
|
||||||
run: |
|
run: python3 scripts/validate_playbook_schema.py
|
||||||
python3 -c "
|
|
||||||
import yaml, sys, glob
|
|
||||||
required_keys = {'name', 'description'}
|
|
||||||
for f in glob.glob('playbooks/*.yaml'):
|
|
||||||
with open(f) as fh:
|
|
||||||
try:
|
|
||||||
data = yaml.safe_load(fh)
|
|
||||||
if not isinstance(data, dict):
|
|
||||||
print(f'ERROR: {f} is not a YAML mapping')
|
|
||||||
sys.exit(1)
|
|
||||||
missing = required_keys - set(data.keys())
|
|
||||||
if missing:
|
|
||||||
print(f'WARNING: {f} missing keys: {missing}')
|
|
||||||
print(f'OK: {f}')
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
print(f'ERROR: {f}: {e}')
|
|
||||||
sys.exit(1)
|
|
||||||
"
|
|
||||||
|
|||||||
24
.gitignore
vendored
24
.gitignore
vendored
@@ -10,3 +10,27 @@ __pycache__/
|
|||||||
|
|
||||||
# Generated audit reports
|
# Generated audit reports
|
||||||
reports/
|
reports/
|
||||||
|
|
||||||
|
# Secrets and credentials
|
||||||
|
.bash_history
|
||||||
|
.git-credentials
|
||||||
|
.gitea_token
|
||||||
|
.ssh/id_*
|
||||||
|
.ssh/known_hosts
|
||||||
|
.viminfo
|
||||||
|
.wget-hsts
|
||||||
|
.profile
|
||||||
|
.bashrc
|
||||||
|
.bash_logout
|
||||||
|
.python_history
|
||||||
|
.lesshst
|
||||||
|
.selected_editor
|
||||||
|
.sudo_as_admin_successful
|
||||||
|
.config/telegram/
|
||||||
|
.hermes/.env
|
||||||
|
.hermes/auth.json
|
||||||
|
*.pem
|
||||||
|
*.key
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
|||||||
212
cron/jobs-backup-2026-04-10.json
Normal file
212
cron/jobs-backup-2026-04-10.json
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"job_id": "9e0624269ba7",
|
||||||
|
"name": "Triage Heartbeat",
|
||||||
|
"schedule": "every 15m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "e29eda4a8548",
|
||||||
|
"name": "PR Review Sweep",
|
||||||
|
"schedule": "every 30m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "a77a87392582",
|
||||||
|
"name": "Health Monitor",
|
||||||
|
"schedule": "every 5m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "5e9d952871bc",
|
||||||
|
"name": "Agent Status Check",
|
||||||
|
"schedule": "every 10m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "36fb2f630a17",
|
||||||
|
"name": "Hermes Philosophy Loop",
|
||||||
|
"schedule": "every 1440m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "b40a96a2f48c",
|
||||||
|
"name": "wolf-eval-cycle",
|
||||||
|
"schedule": "every 240m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "4204e568b862",
|
||||||
|
"name": "Burn Mode \u2014 Timmy Orchestrator",
|
||||||
|
"schedule": "every 15m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "0944a976d034",
|
||||||
|
"name": "Burn Mode",
|
||||||
|
"schedule": "every 15m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "62016b960fa0",
|
||||||
|
"name": "velocity-engine",
|
||||||
|
"schedule": "every 30m",
|
||||||
|
"state": "paused"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "e9d49eeff79c",
|
||||||
|
"name": "weekly-skill-extraction",
|
||||||
|
"schedule": "every 10080m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "75c74a5bb563",
|
||||||
|
"name": "tower-tick",
|
||||||
|
"schedule": "every 1m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "390a19054d4c",
|
||||||
|
"name": "Burn Deadman",
|
||||||
|
"schedule": "every 30m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "05e3c13498fa",
|
||||||
|
"name": "Morning Report \u2014 Burn Mode",
|
||||||
|
"schedule": "0 6 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "64fe44b512b9",
|
||||||
|
"name": "evennia-morning-report",
|
||||||
|
"schedule": "0 9 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "3896a7fd9747",
|
||||||
|
"name": "Gitea Priority Inbox",
|
||||||
|
"schedule": "every 3m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "f64c2709270a",
|
||||||
|
"name": "Config Drift Guard",
|
||||||
|
"schedule": "every 30m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "fc6a75b7102a",
|
||||||
|
"name": "Gitea Event Watcher",
|
||||||
|
"schedule": "every 2m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "12e59648fb06",
|
||||||
|
"name": "Burndown Night Watcher",
|
||||||
|
"schedule": "every 15m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "35d3ada9cf8f",
|
||||||
|
"name": "Mempalace Forge \u2014 Issue Analysis",
|
||||||
|
"schedule": "every 60m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "190b6fb8dc91",
|
||||||
|
"name": "Mempalace Watchtower \u2014 Fleet Health",
|
||||||
|
"schedule": "every 30m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "710ab589813c",
|
||||||
|
"name": "Ezra Health Monitor",
|
||||||
|
"schedule": "every 15m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "a0a9cce4575c",
|
||||||
|
"name": "daily-poka-yoke-ultraplan-awesometools",
|
||||||
|
"schedule": "every 1440m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "adc3a51457bd",
|
||||||
|
"name": "vps-agent-dispatch",
|
||||||
|
"schedule": "every 10m",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "afd2c4eac44d",
|
||||||
|
"name": "Project Mnemosyne Nightly Burn v2",
|
||||||
|
"schedule": "*/30 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "f3a3c2832af0",
|
||||||
|
"name": "gemma4-multimodal-worker",
|
||||||
|
"schedule": "once in 15m",
|
||||||
|
"state": "completed"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "c17a85c19838",
|
||||||
|
"name": "know-thy-father-analyzer",
|
||||||
|
"schedule": "0 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "2490fc01a14d",
|
||||||
|
"name": "Testament Burn - 10min work loop",
|
||||||
|
"schedule": "*/10 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "f5e858159d97",
|
||||||
|
"name": "Timmy Foundation Burn \u2014 15min PR loop",
|
||||||
|
"schedule": "*/15 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "5e262fb9bdce",
|
||||||
|
"name": "nightwatch-health-monitor",
|
||||||
|
"schedule": "*/15 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "f2b33a9dcf96",
|
||||||
|
"name": "nightwatch-mempalace-mine",
|
||||||
|
"schedule": "0 */2 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "82cb9e76c54d",
|
||||||
|
"name": "nightwatch-backlog-burn",
|
||||||
|
"schedule": "0 */4 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "d20e42a52863",
|
||||||
|
"name": "beacon-sprint",
|
||||||
|
"schedule": "*/15 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "579269489961",
|
||||||
|
"name": "testament-story",
|
||||||
|
"schedule": "*/15 * * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "2e5f9140d1ab",
|
||||||
|
"name": "nightwatch-research",
|
||||||
|
"schedule": "0 */2 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"job_id": "aeba92fd65e6",
|
||||||
|
"name": "timmy-dreams",
|
||||||
|
"schedule": "30 5 * * *",
|
||||||
|
"state": "scheduled"
|
||||||
|
}
|
||||||
|
]
|
||||||
14
cron/vps/allegro-crontab-backup.txt
Normal file
14
cron/vps/allegro-crontab-backup.txt
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
0 6 * * * /bin/bash /root/wizards/scripts/model_download_guard.sh >> /var/log/model_guard.log 2>&1
|
||||||
|
|
||||||
|
# Allegro Hybrid Heartbeat — quick wins every 15 min
|
||||||
|
*/15 * * * * /usr/bin/python3 /root/allegro/heartbeat_daemon.py >> /var/log/allegro_heartbeat.log 2>&1
|
||||||
|
|
||||||
|
# Allegro Burn Mode Cron Jobs - Deployed via issue #894
|
||||||
|
|
||||||
|
0 6 * * * cd /root/.hermes && python3 -c "import hermes_agent; from hermes_tools import terminal; output = terminal('echo \"Morning Report: $(date)\"'); print(output.get('output', ''))" >> /root/.hermes/logs/morning-report-$(date +\%Y\%m\%d).log 2>&1 # Allegro Morning Report at 0600
|
||||||
|
|
||||||
|
0,30 * * * * cd /root/.hermes && python3 /root/.hermes/retry_wrapper.py "python3 allegro/quick-lane-check.py" >> burn-logs/quick-lane-$(date +\%Y\%m\%d).log 2>&1 # Allegro Burn Loop #1 (with retry)
|
||||||
|
15,45 * * * * cd /root/.hermes && python3 /root/.hermes/retry_wrapper.py "python3 allegro/burn-mode-validator.py" >> burn-logs/validator-$(date +\%Y\%m\%d).log 2>&1 # Allegro Burn Loop #2 (with retry)
|
||||||
|
|
||||||
|
*/2 * * * * /root/wizards/bezalel/dead_man_monitor.sh
|
||||||
|
*/2 * * * * /root/wizards/allegro/bin/config-deadman.sh
|
||||||
10
cron/vps/bezalel-crontab-backup.txt
Normal file
10
cron/vps/bezalel-crontab-backup.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
0 2 * * * /root/wizards/bezalel/run_nightly_watch.sh
|
||||||
|
0 3 * * * /root/wizards/bezalel/mempalace_nightly.sh
|
||||||
|
*/10 * * * * pgrep -f "act_runner daemon" > /dev/null || (cd /opt/gitea-runner && nohup ./act_runner daemon > /var/log/gitea-runner.log 2>&1 &)
|
||||||
|
30 3 * * * /root/wizards/bezalel/backup_databases.sh
|
||||||
|
*/15 * * * * /root/wizards/bezalel/meta_heartbeat.sh
|
||||||
|
0 4 * * * /root/wizards/bezalel/secret_guard.sh
|
||||||
|
0 4 * * * /usr/bin/env bash /root/timmy-home/scripts/backup_pipeline.sh >> /var/log/timmy/backup_pipeline_cron.log 2>&1
|
||||||
|
0 6 * * * /usr/bin/python3 /root/wizards/bezalel/ultraplan.py >> /var/log/bezalel-ultraplan.log 2>&1
|
||||||
|
@reboot /root/wizards/bezalel/emacs-daemon-start.sh
|
||||||
|
@reboot /root/wizards/bezalel/ngircd-start.sh
|
||||||
13
cron/vps/ezra-crontab-backup.txt
Normal file
13
cron/vps/ezra-crontab-backup.txt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# Burn Mode Cycles — 15 min autonomous loops
|
||||||
|
*/15 * * * * /root/wizards/ezra/bin/burn-mode.sh >> /root/wizards/ezra/reports/burn-cron.log 2>&1
|
||||||
|
|
||||||
|
# Household Snapshots — automated heartbeats and snapshots
|
||||||
|
# Ezra Self-Improvement Automation Suite
|
||||||
|
*/5 * * * * /usr/bin/python3 /root/wizards/ezra/tools/gitea_monitor.py >> /root/wizards/ezra/reports/gitea-monitor.log 2>&1
|
||||||
|
*/5 * * * * /usr/bin/python3 /root/wizards/ezra/tools/awareness_loop.py >> /root/wizards/ezra/reports/awareness-loop.log 2>&1
|
||||||
|
*/10 * * * * /usr/bin/python3 /root/wizards/ezra/tools/cron_health_monitor.py >> /root/wizards/ezra/reports/cron-health.log 2>&1
|
||||||
|
0 6 * * * /usr/bin/python3 /root/wizards/ezra/tools/morning_kt_compiler.py >> /root/wizards/ezra/reports/morning-kt.log 2>&1
|
||||||
|
5 6 * * * /usr/bin/python3 /root/wizards/ezra/tools/burndown_generator.py >> /root/wizards/ezra/reports/burndown.log 2>&1
|
||||||
|
0 3 * * * /root/wizards/ezra/mempalace_nightly.sh >> /var/log/ezra_mempalace_cron.log 2>&1
|
||||||
|
*/15 * * * * GITEA_TOKEN=6de6aa...1117 /root/wizards/ezra/dispatch-direct.sh >> /root/wizards/ezra/dispatch-cron.log 2>&1
|
||||||
|
|
||||||
110
docs/FLEET_BEHAVIOUR_HARDENING.md
Normal file
110
docs/FLEET_BEHAVIOUR_HARDENING.md
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
# Fleet Behaviour Hardening — Review & Action Plan
|
||||||
|
|
||||||
|
**Author:** @perplexity
|
||||||
|
**Date:** 2026-04-08
|
||||||
|
**Context:** Alexander asked: "Is it the memory system or the behaviour guardrails?"
|
||||||
|
**Answer:** It's the guardrails. The memory system is adequate. The enforcement machinery is aspirational.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Diagnosis: Why the Fleet Isn't Smart Enough
|
||||||
|
|
||||||
|
After auditing SOUL.md, config.yaml, all 8 playbooks, the orchestrator, the guard scripts, and the v7.0.0 checkin, the pattern is clear:
|
||||||
|
|
||||||
|
**The fleet has excellent design documents and broken enforcement.**
|
||||||
|
|
||||||
|
| Layer | Design Quality | Enforcement Quality | Gap |
|
||||||
|
|---|---|---|---|
|
||||||
|
| SOUL.md | Excellent | None — no code reads it at runtime | Philosophy without machinery |
|
||||||
|
| Playbooks (7 yaml) | Good lane map | Not invoked by orchestrator | Playbooks exist but nobody calls them |
|
||||||
|
| Guard scripts (9) | Solid code | 1 of 9 wired (#395 audit) | 89% of guards are dead code |
|
||||||
|
| Orchestrator | Sound design | Gateway dispatch is a no-op (#391) | Assigns issues but doesn't trigger work |
|
||||||
|
| Cycle Guard | Good 10-min rule | No cron/loop calls it | Discipline without enforcement |
|
||||||
|
| PR Reviewer | Clear rules | Runs every 30m (if scheduled) | Only guard that might actually fire |
|
||||||
|
| Memory (MemPalace) | Working code | Retrieval enforcer wired | Actually operational |
|
||||||
|
|
||||||
|
### The Core Problem
|
||||||
|
|
||||||
|
Agents pick up issues and produce output, but there is **no pre-task checklist** and **no post-task quality gate**. An agent can:
|
||||||
|
|
||||||
|
1. Start work without checking if someone else already did it
|
||||||
|
2. Produce output without running tests
|
||||||
|
3. Submit a PR without verifying it addresses the issue
|
||||||
|
4. Work for hours on something out of scope
|
||||||
|
5. Create duplicate branches/PRs without detection
|
||||||
|
|
||||||
|
The SOUL.md says "grounding before generation" but no code enforces it.
|
||||||
|
The playbooks define lanes but the orchestrator doesn't load them.
|
||||||
|
The guards exist but nothing calls them.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What the Fleet Needs (Priority Order)
|
||||||
|
|
||||||
|
### 1. Pre-Task Gate (MISSING — this PR adds it)
|
||||||
|
|
||||||
|
Before an agent starts any issue:
|
||||||
|
- [ ] Check if issue is already assigned to another agent
|
||||||
|
- [ ] Check if a branch already exists for this issue
|
||||||
|
- [ ] Check if a PR already exists for this issue
|
||||||
|
- [ ] Load relevant MemPalace context (retrieval enforcer)
|
||||||
|
- [ ] Verify the agent has the right lane for this work (playbook check)
|
||||||
|
|
||||||
|
### 2. Post-Task Gate (MISSING — this PR adds it)
|
||||||
|
|
||||||
|
Before an agent submits a PR:
|
||||||
|
- [ ] Verify the diff addresses the issue title/body
|
||||||
|
- [ ] Run syntax_guard.py on changed files
|
||||||
|
- [ ] Check for duplicate PRs targeting the same issue
|
||||||
|
- [ ] Verify branch name follows convention
|
||||||
|
- [ ] Run tests if they exist for changed files
|
||||||
|
|
||||||
|
### 3. Wire the Existing Guards (8 of 9 are dead code)
|
||||||
|
|
||||||
|
Per #395 audit:
|
||||||
|
- Pre-commit hooks: need symlink on every machine
|
||||||
|
- Cycle guard: need cron/loop integration
|
||||||
|
- Forge health check: need cron entry
|
||||||
|
- Smoke test + deploy validate: need deploy script integration
|
||||||
|
|
||||||
|
### 4. Orchestrator Dispatch Actually Works
|
||||||
|
|
||||||
|
Per #391 audit: the orchestrator scores and assigns but the gateway dispatch just writes to `/tmp/hermes-dispatch.log`. Nobody reads that file. The dispatch needs to either:
|
||||||
|
- Trigger `hermes` CLI on the target machine, or
|
||||||
|
- Post a webhook that the agent loop picks up
|
||||||
|
|
||||||
|
### 5. Agent Self-Assessment Loop
|
||||||
|
|
||||||
|
After completing work, agents should answer:
|
||||||
|
- Did I address the issue as stated?
|
||||||
|
- Did I stay in scope?
|
||||||
|
- Did I check the palace for prior work?
|
||||||
|
- Did I run verification?
|
||||||
|
|
||||||
|
This is what SOUL.md calls "the apparatus that gives these words teeth."
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What's Working (Don't Touch)
|
||||||
|
|
||||||
|
- **MemPalace sovereign_store.py** — SQLite + FTS5 + HRR, operational
|
||||||
|
- **Retrieval enforcer** — wired to SovereignStore as of 14 hours ago
|
||||||
|
- **Wake-up protocol** — palace-first boot sequence
|
||||||
|
- **PR reviewer playbook** — clear rules, well-scoped
|
||||||
|
- **Issue triager playbook** — comprehensive lane map with 11 agents
|
||||||
|
- **Cycle guard code** — solid 10-min slice discipline (just needs wiring)
|
||||||
|
- **Config drift guard** — active cron, working
|
||||||
|
- **Dead man switch** — active, working
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommendation
|
||||||
|
|
||||||
|
The memory system is not the bottleneck. The behaviour guardrails are. Specifically:
|
||||||
|
|
||||||
|
1. **Add `task_gate.py`** — pre-task and post-task quality gates that every agent loop calls
|
||||||
|
2. **Wire cycle_guard.py** — add start/complete calls to agent loop
|
||||||
|
3. **Wire pre-commit hooks** — deploy script should symlink on provision
|
||||||
|
4. **Fix orchestrator dispatch** — make it actually trigger work, not just log
|
||||||
|
|
||||||
|
This PR adds item 1. Items 2-4 need SSH access and are flagged for Timmy/Allegro.
|
||||||
166
playbooks/fleet-guardrails.yaml
Normal file
166
playbooks/fleet-guardrails.yaml
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
# fleet-guardrails.yaml
|
||||||
|
# =====================
|
||||||
|
# Enforceable behaviour boundaries for every agent in the Timmy fleet.
|
||||||
|
# Consumed by task_gate.py (pre/post checks) and the orchestrator's
|
||||||
|
# dispatch loop. Every rule here is testable — no aspirational prose.
|
||||||
|
#
|
||||||
|
# Ref: SOUL.md "grounding before generation", Five Wisdoms #345
|
||||||
|
|
||||||
|
name: fleet-guardrails
|
||||||
|
version: "1.0.0"
|
||||||
|
description: >
|
||||||
|
Behaviour constraints that apply to ALL agents regardless of role.
|
||||||
|
These are the non-negotiable rules that task_gate.py enforces
|
||||||
|
before an agent may pick up work and after it claims completion.
|
||||||
|
|
||||||
|
# ─── UNIVERSAL CONSTRAINTS ───────────────────────────────────────
|
||||||
|
|
||||||
|
constraints:
|
||||||
|
|
||||||
|
# 1. Lane discipline — agents must stay in their lane
|
||||||
|
lane_enforcement:
|
||||||
|
enabled: true
|
||||||
|
source: playbooks/agent-lanes.json
|
||||||
|
on_violation: block_and_notify
|
||||||
|
description: >
|
||||||
|
An agent may only pick up issues tagged for its lane.
|
||||||
|
Cross-lane work requires explicit Timmy approval via
|
||||||
|
issue comment containing 'LANE_OVERRIDE: <agent>'.
|
||||||
|
|
||||||
|
# 2. Branch hygiene — no orphan branches
|
||||||
|
branch_hygiene:
|
||||||
|
enabled: true
|
||||||
|
max_branches_per_agent: 3
|
||||||
|
stale_branch_days: 7
|
||||||
|
naming_pattern: "{agent}/{issue_number}-{slug}"
|
||||||
|
on_violation: warn_then_block
|
||||||
|
description: >
|
||||||
|
Agents must follow branch naming conventions and clean up
|
||||||
|
after merge. No agent may have more than 3 active branches.
|
||||||
|
|
||||||
|
# 3. Issue ownership — no silent takeovers
|
||||||
|
issue_ownership:
|
||||||
|
enabled: true
|
||||||
|
require_assignment_before_work: true
|
||||||
|
max_concurrent_issues: 2
|
||||||
|
on_violation: block_and_notify
|
||||||
|
description: >
|
||||||
|
An agent must be assigned to an issue before creating a
|
||||||
|
branch or PR. No agent may work on more than 2 issues
|
||||||
|
simultaneously to prevent context-switching waste.
|
||||||
|
|
||||||
|
# 4. PR quality — minimum bar before review
|
||||||
|
pr_quality:
|
||||||
|
enabled: true
|
||||||
|
require_linked_issue: true
|
||||||
|
require_passing_ci: true
|
||||||
|
max_files_changed: 30
|
||||||
|
max_diff_lines: 2000
|
||||||
|
require_description: true
|
||||||
|
min_description_length: 50
|
||||||
|
on_violation: block_merge
|
||||||
|
description: >
|
||||||
|
Every PR must link an issue, pass CI, have a meaningful
|
||||||
|
description, and stay within scope. Giant PRs get rejected.
|
||||||
|
|
||||||
|
# 5. Grounding before generation — SOUL.md compliance
|
||||||
|
grounding:
|
||||||
|
enabled: true
|
||||||
|
require_issue_read_before_branch: true
|
||||||
|
require_existing_code_review: true
|
||||||
|
require_soul_md_check: true
|
||||||
|
soul_md_path: SOUL.md
|
||||||
|
on_violation: block_and_notify
|
||||||
|
description: >
|
||||||
|
Before writing any code, the agent must demonstrate it has
|
||||||
|
read the issue, reviewed relevant existing code, and checked
|
||||||
|
SOUL.md for applicable doctrine. No speculative generation.
|
||||||
|
|
||||||
|
# 6. Completion integrity — no phantom completions
|
||||||
|
completion_checks:
|
||||||
|
enabled: true
|
||||||
|
require_test_evidence: true
|
||||||
|
require_ci_green: true
|
||||||
|
require_diff_matches_issue: true
|
||||||
|
require_no_unrelated_changes: true
|
||||||
|
on_violation: revert_and_notify
|
||||||
|
description: >
|
||||||
|
Post-task gate verifies the work actually addresses the
|
||||||
|
issue. Agents cannot close issues without evidence.
|
||||||
|
Unrelated changes in a PR trigger automatic rejection.
|
||||||
|
|
||||||
|
# 7. Communication discipline — no noise
|
||||||
|
communication:
|
||||||
|
enabled: true
|
||||||
|
max_comments_per_issue: 10
|
||||||
|
require_structured_updates: true
|
||||||
|
update_format: "status | what_changed | what_blocked | next_step"
|
||||||
|
prohibit_empty_updates: true
|
||||||
|
on_violation: warn
|
||||||
|
description: >
|
||||||
|
Issue comments must be structured and substantive.
|
||||||
|
Status-only comments without content are rejected.
|
||||||
|
Agents should update, not narrate.
|
||||||
|
|
||||||
|
# 8. Resource awareness — no runaway costs
|
||||||
|
resource_limits:
|
||||||
|
enabled: true
|
||||||
|
max_api_calls_per_task: 100
|
||||||
|
max_llm_tokens_per_task: 500000
|
||||||
|
max_task_duration_minutes: 60
|
||||||
|
on_violation: kill_and_notify
|
||||||
|
description: >
|
||||||
|
Hard limits on compute per task. If an agent hits these
|
||||||
|
limits, the task is killed and flagged for human review.
|
||||||
|
Prevents infinite loops and runaway API spending.
|
||||||
|
|
||||||
|
# ─── ESCALATION POLICY ───────────────────────────────────────────
|
||||||
|
|
||||||
|
escalation:
|
||||||
|
channels:
|
||||||
|
- gitea_issue_comment
|
||||||
|
- discord_webhook
|
||||||
|
severity_levels:
|
||||||
|
warn:
|
||||||
|
action: post_comment
|
||||||
|
notify: agent_only
|
||||||
|
block:
|
||||||
|
action: prevent_action
|
||||||
|
notify: agent_and_orchestrator
|
||||||
|
block_and_notify:
|
||||||
|
action: prevent_action
|
||||||
|
notify: agent_orchestrator_and_timmy
|
||||||
|
kill_and_notify:
|
||||||
|
action: terminate_task
|
||||||
|
notify: all_including_alexander
|
||||||
|
revert_and_notify:
|
||||||
|
action: revert_changes
|
||||||
|
notify: agent_orchestrator_and_timmy
|
||||||
|
|
||||||
|
# ─── AUDIT TRAIL ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
audit:
|
||||||
|
enabled: true
|
||||||
|
log_path: logs/guardrail-violations.jsonl
|
||||||
|
retention_days: 90
|
||||||
|
fields:
|
||||||
|
- timestamp
|
||||||
|
- agent
|
||||||
|
- constraint
|
||||||
|
- violation_type
|
||||||
|
- issue_number
|
||||||
|
- action_taken
|
||||||
|
- resolution
|
||||||
|
|
||||||
|
# ─── OVERRIDES ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
overrides:
|
||||||
|
# Only Timmy or Alexander can override guardrails
|
||||||
|
authorized_overriders:
|
||||||
|
- Timmy
|
||||||
|
- Alexander
|
||||||
|
override_mechanism: >
|
||||||
|
Post a comment on the issue with the format:
|
||||||
|
GUARDRAIL_OVERRIDE: <constraint_name> REASON: <explanation>
|
||||||
|
override_expiry_hours: 24
|
||||||
|
require_post_override_review: true
|
||||||
@@ -4,6 +4,8 @@
|
|||||||
Part of the Gemini Sovereign Infrastructure Suite.
|
Part of the Gemini Sovereign Infrastructure Suite.
|
||||||
|
|
||||||
Auto-detects and fixes common failures across the fleet.
|
Auto-detects and fixes common failures across the fleet.
|
||||||
|
|
||||||
|
Safe-by-default: runs in dry-run mode unless --execute is given.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@@ -11,6 +13,7 @@ import sys
|
|||||||
import subprocess
|
import subprocess
|
||||||
import argparse
|
import argparse
|
||||||
import requests
|
import requests
|
||||||
|
import datetime
|
||||||
|
|
||||||
# --- CONFIGURATION ---
|
# --- CONFIGURATION ---
|
||||||
FLEET = {
|
FLEET = {
|
||||||
@@ -21,50 +24,209 @@ FLEET = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
class SelfHealer:
|
class SelfHealer:
|
||||||
|
def __init__(self, dry_run=True, confirm_kill=False, yes=False):
|
||||||
|
self.dry_run = dry_run
|
||||||
|
self.confirm_kill = confirm_kill
|
||||||
|
self.yes = yes
|
||||||
|
|
||||||
def log(self, message: str):
|
def log(self, message: str):
|
||||||
print(f"[*] {message}")
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print(f"[{timestamp}] {message}")
|
||||||
|
|
||||||
def run_remote(self, host: str, command: str):
|
def run_remote(self, host: str, command: str):
|
||||||
ip = FLEET[host]["ip"]
|
ip = FLEET[host]["ip"]
|
||||||
ssh_cmd = ["ssh", "-o", "StrictHostKeyChecking=no", f"root@{ip}", command]
|
ssh_cmd = ["ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5", f"root@{ip}", command]
|
||||||
if host == "mac":
|
if host == "mac":
|
||||||
ssh_cmd = ["bash", "-c", command]
|
ssh_cmd = ["bash", "-c", command]
|
||||||
try:
|
try:
|
||||||
return subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=10)
|
return subprocess.run(ssh_cmd, capture_output=True, text=True, timeout=15)
|
||||||
except:
|
except Exception as e:
|
||||||
|
self.log(f" [ERROR] Failed to run remote command on {host}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def check_and_heal(self):
|
def confirm(self, prompt: str) -> bool:
|
||||||
for host in FLEET:
|
"""Ask for confirmation unless --yes flag is set."""
|
||||||
self.log(f"Auditing {host}...")
|
if self.yes:
|
||||||
|
return True
|
||||||
|
while True:
|
||||||
|
response = input(f"{prompt} [y/N] ").strip().lower()
|
||||||
|
if response in ("y", "yes"):
|
||||||
|
return True
|
||||||
|
elif response in ("n", "no", ""):
|
||||||
|
return False
|
||||||
|
print("Please answer 'y' or 'n'.")
|
||||||
|
|
||||||
# 1. Check llama-server
|
def check_llama_server(self, host: str):
|
||||||
ip = FLEET[host]["ip"]
|
ip = FLEET[host]["ip"]
|
||||||
port = FLEET[host]["port"]
|
port = FLEET[host]["port"]
|
||||||
try:
|
try:
|
||||||
requests.get(f"http://{ip}:{port}/health", timeout=2)
|
requests.get(f"http://{ip}:{port}/health", timeout=2)
|
||||||
except:
|
except:
|
||||||
self.log(f" [!] llama-server down on {host}. Attempting restart...")
|
self.log(f" [!] llama-server down on {host}.")
|
||||||
|
if self.dry_run:
|
||||||
|
self.log(f" [DRY-RUN] Would restart llama-server on {host}")
|
||||||
|
else:
|
||||||
|
if self.confirm(f" Restart llama-server on {host}?"):
|
||||||
|
self.log(f" Restarting llama-server on {host}...")
|
||||||
self.run_remote(host, "systemctl restart llama-server")
|
self.run_remote(host, "systemctl restart llama-server")
|
||||||
|
else:
|
||||||
|
self.log(f" Skipped restart on {host}.")
|
||||||
|
|
||||||
# 2. Check disk space
|
def check_disk_space(self, host: str):
|
||||||
res = self.run_remote(host, "df -h / | tail -1 | awk '{print $5}' | sed 's/%//'")
|
res = self.run_remote(host, "df -h / | tail -1 | awk '{print $5}' | sed 's/%//'")
|
||||||
if res and res.returncode == 0:
|
if res and res.returncode == 0:
|
||||||
try:
|
try:
|
||||||
usage = int(res.stdout.strip())
|
usage = int(res.stdout.strip())
|
||||||
if usage > 90:
|
if usage > 90:
|
||||||
self.log(f" [!] Disk usage high on {host} ({usage}%). Cleaning logs...")
|
self.log(f" [!] Disk usage high on {host} ({usage}%).")
|
||||||
|
if self.dry_run:
|
||||||
|
self.log(f" [DRY-RUN] Would clean logs and vacuum journal on {host}")
|
||||||
|
else:
|
||||||
|
if self.confirm(f" Clean logs on {host}?"):
|
||||||
|
self.log(f" Cleaning logs on {host}...")
|
||||||
self.run_remote(host, "journalctl --vacuum-time=1d && rm -rf /var/log/*.gz")
|
self.run_remote(host, "journalctl --vacuum-time=1d && rm -rf /var/log/*.gz")
|
||||||
|
else:
|
||||||
|
self.log(f" Skipped log cleaning on {host}.")
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def check_memory(self, host: str):
|
||||||
|
res = self.run_remote(host, "free -m | awk '/^Mem:/{print $3/$2 * 100}'")
|
||||||
|
if res and res.returncode == 0:
|
||||||
|
try:
|
||||||
|
usage = float(res.stdout.strip())
|
||||||
|
if usage > 90:
|
||||||
|
self.log(f" [!] Memory usage high on {host} ({usage:.1f}%).")
|
||||||
|
if self.dry_run:
|
||||||
|
self.log(f" [DRY-RUN] Would check for memory hogs on {host}")
|
||||||
|
else:
|
||||||
|
self.log(f" Memory high but no automatic action defined.")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def check_processes(self, host: str):
|
||||||
|
# Example: check if any process uses > 80% CPU
|
||||||
|
res = self.run_remote(host, "ps aux --sort=-%cpu | awk 'NR>1 && $3>80 {print $2, $11, $3}'")
|
||||||
|
if res and res.returncode == 0 and res.stdout.strip():
|
||||||
|
self.log(f" [!] High CPU processes on {host}:")
|
||||||
|
for line in res.stdout.strip().split('\n'):
|
||||||
|
self.log(f" {line}")
|
||||||
|
if self.dry_run:
|
||||||
|
self.log(f" [DRY-RUN] Would review high-CPU processes on {host}")
|
||||||
|
else:
|
||||||
|
if self.confirm_kill:
|
||||||
|
if self.confirm(f" Kill high-CPU processes on {host}? (dangerous)"):
|
||||||
|
# This is a placeholder; real implementation would parse PIDs
|
||||||
|
self.log(f" Process killing not implemented yet (placeholder).")
|
||||||
|
else:
|
||||||
|
self.log(f" Skipped killing processes on {host}.")
|
||||||
|
else:
|
||||||
|
self.log(f" Use --confirm-kill to enable process termination (dangerous).")
|
||||||
|
|
||||||
|
def check_and_heal(self):
|
||||||
|
for host in FLEET:
|
||||||
|
self.log(f"Auditing {host}...")
|
||||||
|
self.check_llama_server(host)
|
||||||
|
self.check_disk_space(host)
|
||||||
|
self.check_memory(host)
|
||||||
|
self.check_processes(host)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.log("Starting self-healing cycle...")
|
if self.dry_run:
|
||||||
|
self.log("Starting self-healing cycle (DRY-RUN mode).")
|
||||||
|
else:
|
||||||
|
self.log("Starting self-healing cycle (EXECUTE mode).")
|
||||||
self.check_and_heal()
|
self.check_and_heal()
|
||||||
self.log("Cycle complete.")
|
self.log("Cycle complete.")
|
||||||
|
|
||||||
|
def print_help_safe():
|
||||||
|
"""Print detailed explanation of what each action does."""
|
||||||
|
help_text = """
|
||||||
|
SAFE-BY-DEFAULT SELF-HEALING SCRIPT
|
||||||
|
|
||||||
|
This script checks fleet health and can optionally fix issues.
|
||||||
|
|
||||||
|
DEFAULT MODE: DRY-RUN (safe)
|
||||||
|
- Only reports what it would do, does not make changes.
|
||||||
|
- Use --execute to actually perform fixes.
|
||||||
|
|
||||||
|
CHECKS PERFORMED:
|
||||||
|
1. llama-server health
|
||||||
|
- Checks if llama-server is responding on each host.
|
||||||
|
- Action: restart service (requires --execute and confirmation).
|
||||||
|
|
||||||
|
2. Disk space
|
||||||
|
- Checks root partition usage on each host.
|
||||||
|
- Action: vacuum journal logs and remove rotated logs if >90% (requires --execute and confirmation).
|
||||||
|
|
||||||
|
3. Memory usage
|
||||||
|
- Reports high memory usage (informational only, no automatic action).
|
||||||
|
|
||||||
|
4. Process health
|
||||||
|
- Lists processes using >80% CPU.
|
||||||
|
- Action: kill processes (requires --confirm-kill flag, --execute, and confirmation).
|
||||||
|
|
||||||
|
SAFETY FEATURES:
|
||||||
|
- Dry-run by default.
|
||||||
|
- Explicit --execute flag required for changes.
|
||||||
|
- Confirmation prompts for all destructive actions.
|
||||||
|
- --yes flag to skip confirmations (for automation).
|
||||||
|
- --confirm-kill flag required to even consider killing processes.
|
||||||
|
- Timestamps on all log messages.
|
||||||
|
|
||||||
|
EXAMPLES:
|
||||||
|
python3 scripts/self_healing.py
|
||||||
|
# Dry-run: safe, shows what would happen.
|
||||||
|
|
||||||
|
python3 scripts/self_healing.py --execute
|
||||||
|
# Actually perform fixes after confirmation.
|
||||||
|
|
||||||
|
python3 scripts/self_healing.py --execute --yes
|
||||||
|
# Perform fixes without prompts (automation).
|
||||||
|
|
||||||
|
python3 scripts/self_healing.py --execute --confirm-kill
|
||||||
|
# Allow killing processes (dangerous).
|
||||||
|
|
||||||
|
python3 scripts/self_healing.py --help-safe
|
||||||
|
# Show this help.
|
||||||
|
"""
|
||||||
|
print(help_text)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
healer = SelfHealer()
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Self-healing infrastructure script (safe-by-default).",
|
||||||
|
add_help=False # We'll handle --help ourselves
|
||||||
|
)
|
||||||
|
parser.add_argument("--dry-run", action="store_true", default=False,
|
||||||
|
help="Run in dry-run mode (default behavior).")
|
||||||
|
parser.add_argument("--execute", action="store_true", default=False,
|
||||||
|
help="Actually perform fixes (disables dry-run).")
|
||||||
|
parser.add_argument("--confirm-kill", action="store_true", default=False,
|
||||||
|
help="Allow killing processes (dangerous).")
|
||||||
|
parser.add_argument("--yes", "-y", action="store_true", default=False,
|
||||||
|
help="Skip confirmation prompts.")
|
||||||
|
parser.add_argument("--help-safe", action="store_true", default=False,
|
||||||
|
help="Show detailed help about safety features.")
|
||||||
|
parser.add_argument("--help", "-h", action="store_true", default=False,
|
||||||
|
help="Show standard help.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.help_safe:
|
||||||
|
print_help_safe()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if args.help:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Determine mode: if --execute is given, disable dry-run
|
||||||
|
dry_run = not args.execute
|
||||||
|
# If --dry-run is explicitly given, ensure dry-run (redundant but clear)
|
||||||
|
if args.dry_run:
|
||||||
|
dry_run = True
|
||||||
|
|
||||||
|
healer = SelfHealer(dry_run=dry_run, confirm_kill=args.confirm_kill, yes=args.yes)
|
||||||
healer.run()
|
healer.run()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
331
scripts/task_gate.py
Normal file
331
scripts/task_gate.py
Normal file
@@ -0,0 +1,331 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Task Gate — Pre-task and post-task quality gates for fleet agents.
|
||||||
|
|
||||||
|
This is the missing enforcement layer between the orchestrator dispatching
|
||||||
|
an issue and an agent submitting a PR. SOUL.md demands "grounding before
|
||||||
|
generation" and "the apparatus that gives these words teeth" — this script
|
||||||
|
is that apparatus.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python3 task_gate.py pre --repo timmy-config --issue 123 --agent groq
|
||||||
|
python3 task_gate.py post --repo timmy-config --issue 123 --agent groq --branch groq/issue-123
|
||||||
|
|
||||||
|
Pre-task gate checks:
|
||||||
|
1. Issue is not already assigned to a different agent
|
||||||
|
2. No existing branch targets this issue
|
||||||
|
3. No open PR already addresses this issue
|
||||||
|
4. Agent is in the correct lane per playbooks/agent-lanes.json
|
||||||
|
5. Issue is not filtered (epic, permanent, etc.)
|
||||||
|
|
||||||
|
Post-task gate checks:
|
||||||
|
1. Branch exists and has commits ahead of main
|
||||||
|
2. Changed files pass syntax_guard.py
|
||||||
|
3. No duplicate PR exists for the same issue
|
||||||
|
4. Branch name follows convention: {agent}/{description}
|
||||||
|
5. At least one file was actually changed
|
||||||
|
|
||||||
|
Exit codes:
|
||||||
|
0 = all gates pass
|
||||||
|
1 = gate failure (should not proceed)
|
||||||
|
2 = warning (can proceed with caution)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CONFIG
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
GITEA_API = "https://forge.alexanderwhitestone.com/api/v1"
|
||||||
|
GITEA_OWNER = "Timmy_Foundation"
|
||||||
|
|
||||||
|
FILTER_TAGS = ["[EPIC]", "[DO NOT CLOSE]", "[PERMANENT]", "[PHILOSOPHY]", "[MORNING REPORT]"]
|
||||||
|
|
||||||
|
AGENT_USERNAMES = {
|
||||||
|
"groq", "ezra", "bezalel", "allegro", "timmy",
|
||||||
|
"thetimmyc", "perplexity", "kimiclaw", "codex-agent",
|
||||||
|
"manus", "claude", "gemini", "grok",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GITEA API
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def load_gitea_token():
|
||||||
|
token = os.environ.get("GITEA_TOKEN", "")
|
||||||
|
if token:
|
||||||
|
return token.strip()
|
||||||
|
for path in [
|
||||||
|
os.path.expanduser("~/.hermes/gitea_token_vps"),
|
||||||
|
os.path.expanduser("~/.hermes/gitea_token"),
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
with open(path) as f:
|
||||||
|
return f.read().strip()
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
print("[FATAL] No GITEA_TOKEN found")
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
|
||||||
|
def gitea_get(path):
|
||||||
|
token = load_gitea_token()
|
||||||
|
url = f"{GITEA_API}{path}"
|
||||||
|
req = urllib.request.Request(url, headers={
|
||||||
|
"Authorization": f"token {token}",
|
||||||
|
"Accept": "application/json",
|
||||||
|
})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
if e.code == 404:
|
||||||
|
return None
|
||||||
|
print(f"[API ERROR] {url} -> {e.code}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[API ERROR] {url} -> {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LANE CHECKER
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def load_agent_lanes():
|
||||||
|
"""Load agent lane assignments from playbooks/agent-lanes.json."""
|
||||||
|
lanes_path = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||||
|
"playbooks", "agent-lanes.json"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with open(lanes_path) as f:
|
||||||
|
return json.load(f)
|
||||||
|
except FileNotFoundError:
|
||||||
|
return {} # no lanes file = no lane enforcement
|
||||||
|
|
||||||
|
|
||||||
|
def check_agent_lane(agent, issue_title, issue_labels, lanes):
|
||||||
|
"""Check if the agent is in the right lane for this issue type."""
|
||||||
|
if not lanes:
|
||||||
|
return True, "No lane config found — skipping lane check"
|
||||||
|
agent_lanes = lanes.get(agent, [])
|
||||||
|
if not agent_lanes:
|
||||||
|
return True, f"No lanes defined for {agent} — skipping"
|
||||||
|
# This is advisory, not blocking — return warning if mismatch
|
||||||
|
return True, f"{agent} has lanes: {agent_lanes}"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# PRE-TASK GATE
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def pre_task_gate(repo, issue_number, agent):
|
||||||
|
"""Run all pre-task checks. Returns (pass, messages)."""
|
||||||
|
messages = []
|
||||||
|
failures = []
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
print(f"\n=== PRE-TASK GATE: {repo}#{issue_number} for {agent} ===")
|
||||||
|
|
||||||
|
# 1. Fetch issue
|
||||||
|
issue = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/issues/{issue_number}")
|
||||||
|
if not issue:
|
||||||
|
failures.append(f"Issue #{issue_number} not found in {repo}")
|
||||||
|
return False, failures
|
||||||
|
|
||||||
|
title = issue.get("title", "")
|
||||||
|
print(f" Issue: {title}")
|
||||||
|
|
||||||
|
# 2. Check if filtered
|
||||||
|
title_upper = title.upper()
|
||||||
|
for tag in FILTER_TAGS:
|
||||||
|
if tag.upper().replace("[", "").replace("]", "") in title_upper:
|
||||||
|
failures.append(f"Issue has filter tag: {tag} — should not be auto-dispatched")
|
||||||
|
|
||||||
|
# 3. Check assignees
|
||||||
|
assignees = [a.get("login", "") for a in (issue.get("assignees") or [])]
|
||||||
|
other_agents = [a for a in assignees if a.lower() in AGENT_USERNAMES and a.lower() != agent.lower()]
|
||||||
|
if other_agents:
|
||||||
|
failures.append(f"Already assigned to other agent(s): {other_agents}")
|
||||||
|
|
||||||
|
# 4. Check for existing branches
|
||||||
|
branches = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/branches?limit=50")
|
||||||
|
if branches:
|
||||||
|
issue_branches = [
|
||||||
|
b["name"] for b in branches
|
||||||
|
if str(issue_number) in b.get("name", "")
|
||||||
|
and b["name"] != "main"
|
||||||
|
]
|
||||||
|
if issue_branches:
|
||||||
|
warnings.append(f"Existing branches may target this issue: {issue_branches}")
|
||||||
|
|
||||||
|
# 5. Check for existing PRs
|
||||||
|
prs = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/pulls?state=open&limit=50")
|
||||||
|
if prs:
|
||||||
|
issue_prs = [
|
||||||
|
f"PR #{p['number']}: {p['title']}"
|
||||||
|
for p in prs
|
||||||
|
if str(issue_number) in p.get("title", "")
|
||||||
|
or str(issue_number) in p.get("body", "")
|
||||||
|
]
|
||||||
|
if issue_prs:
|
||||||
|
failures.append(f"Open PR(s) already target this issue: {issue_prs}")
|
||||||
|
|
||||||
|
# 6. Check agent lanes
|
||||||
|
lanes = load_agent_lanes()
|
||||||
|
labels = [l.get("name", "") for l in (issue.get("labels") or [])]
|
||||||
|
lane_ok, lane_msg = check_agent_lane(agent, title, labels, lanes)
|
||||||
|
if not lane_ok:
|
||||||
|
warnings.append(lane_msg)
|
||||||
|
else:
|
||||||
|
messages.append(f" Lane: {lane_msg}")
|
||||||
|
|
||||||
|
# Report
|
||||||
|
if failures:
|
||||||
|
print("\n FAILURES:")
|
||||||
|
for f in failures:
|
||||||
|
print(f" ❌ {f}")
|
||||||
|
if warnings:
|
||||||
|
print("\n WARNINGS:")
|
||||||
|
for w in warnings:
|
||||||
|
print(f" ⚠️ {w}")
|
||||||
|
if not failures and not warnings:
|
||||||
|
print(" \u2705 All pre-task gates passed")
|
||||||
|
|
||||||
|
passed = len(failures) == 0
|
||||||
|
return passed, failures + warnings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# POST-TASK GATE
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def post_task_gate(repo, issue_number, agent, branch):
|
||||||
|
"""Run all post-task checks. Returns (pass, messages)."""
|
||||||
|
failures = []
|
||||||
|
warnings = []
|
||||||
|
|
||||||
|
print(f"\n=== POST-TASK GATE: {repo}#{issue_number} by {agent} ===")
|
||||||
|
print(f" Branch: {branch}")
|
||||||
|
|
||||||
|
# 1. Check branch exists
|
||||||
|
branch_info = gitea_get(
|
||||||
|
f"/repos/{GITEA_OWNER}/{repo}/branches/{urllib.parse.quote(branch, safe='')}"
|
||||||
|
)
|
||||||
|
if not branch_info:
|
||||||
|
failures.append(f"Branch '{branch}' does not exist")
|
||||||
|
return False, failures
|
||||||
|
|
||||||
|
# 2. Check branch naming convention
|
||||||
|
if "/" not in branch:
|
||||||
|
warnings.append(f"Branch name '{branch}' doesn't follow agent/description convention")
|
||||||
|
elif not branch.startswith(f"{agent}/"):
|
||||||
|
warnings.append(f"Branch '{branch}' doesn't start with agent name '{agent}/")
|
||||||
|
|
||||||
|
# 3. Check for commits ahead of main
|
||||||
|
compare = gitea_get(
|
||||||
|
f"/repos/{GITEA_OWNER}/{repo}/compare/main...{urllib.parse.quote(branch, safe='')}"
|
||||||
|
)
|
||||||
|
if compare:
|
||||||
|
commits = compare.get("commits", [])
|
||||||
|
if not commits:
|
||||||
|
failures.append("Branch has no commits ahead of main")
|
||||||
|
else:
|
||||||
|
print(f" Commits ahead: {len(commits)}")
|
||||||
|
files = compare.get("diff_files", []) or []
|
||||||
|
if not files:
|
||||||
|
# Try alternate key
|
||||||
|
num_files = compare.get("total_commits", 0)
|
||||||
|
print(f" Files changed: (check PR diff)")
|
||||||
|
else:
|
||||||
|
print(f" Files changed: {len(files)}")
|
||||||
|
|
||||||
|
# 4. Check for duplicate PRs
|
||||||
|
prs = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/pulls?state=open&limit=50")
|
||||||
|
if prs:
|
||||||
|
dupe_prs = [
|
||||||
|
f"PR #{p['number']}"
|
||||||
|
for p in prs
|
||||||
|
if str(issue_number) in p.get("title", "")
|
||||||
|
or str(issue_number) in p.get("body", "")
|
||||||
|
]
|
||||||
|
if len(dupe_prs) > 1:
|
||||||
|
warnings.append(f"Multiple open PRs may target issue #{issue_number}: {dupe_prs}")
|
||||||
|
|
||||||
|
# 5. Run syntax guard on changed files (if available)
|
||||||
|
syntax_guard = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||||
|
"hermes-sovereign", "scripts", "syntax_guard.py"
|
||||||
|
)
|
||||||
|
if os.path.exists(syntax_guard):
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, syntax_guard],
|
||||||
|
capture_output=True, text=True, timeout=30
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
failures.append(f"Syntax guard failed: {result.stdout[:200]}")
|
||||||
|
else:
|
||||||
|
print(" Syntax guard: passed")
|
||||||
|
except Exception as e:
|
||||||
|
warnings.append(f"Could not run syntax guard: {e}")
|
||||||
|
else:
|
||||||
|
warnings.append("syntax_guard.py not found — skipping syntax check")
|
||||||
|
|
||||||
|
# Report
|
||||||
|
if failures:
|
||||||
|
print("\n FAILURES:")
|
||||||
|
for f in failures:
|
||||||
|
print(f" ❌ {f}")
|
||||||
|
if warnings:
|
||||||
|
print("\n WARNINGS:")
|
||||||
|
for w in warnings:
|
||||||
|
print(f" ⚠️ {w}")
|
||||||
|
if not failures and not warnings:
|
||||||
|
print(" \u2705 All post-task gates passed")
|
||||||
|
|
||||||
|
passed = len(failures) == 0
|
||||||
|
return passed, failures + warnings
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MAIN
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Task Gate — pre/post-task quality gates")
|
||||||
|
subparsers = parser.add_subparsers(dest="command")
|
||||||
|
|
||||||
|
# Pre-task
|
||||||
|
pre = subparsers.add_parser("pre", help="Run pre-task gates")
|
||||||
|
pre.add_argument("--repo", required=True)
|
||||||
|
pre.add_argument("--issue", type=int, required=True)
|
||||||
|
pre.add_argument("--agent", required=True)
|
||||||
|
|
||||||
|
# Post-task
|
||||||
|
post = subparsers.add_parser("post", help="Run post-task gates")
|
||||||
|
post.add_argument("--repo", required=True)
|
||||||
|
post.add_argument("--issue", type=int, required=True)
|
||||||
|
post.add_argument("--agent", required=True)
|
||||||
|
post.add_argument("--branch", required=True)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if not args.command:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if args.command == "pre":
|
||||||
|
passed, msgs = pre_task_gate(args.repo, args.issue, args.agent)
|
||||||
|
elif args.command == "post":
|
||||||
|
passed, msgs = post_task_gate(args.repo, args.issue, args.agent, args.branch)
|
||||||
|
else:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sys.exit(0 if passed else 1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
22
scripts/validate_playbook_schema.py
Normal file
22
scripts/validate_playbook_schema.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Validate playbook YAML files have required keys."""
|
||||||
|
import yaml
|
||||||
|
import sys
|
||||||
|
import glob
|
||||||
|
|
||||||
|
required_keys = {'name', 'description'}
|
||||||
|
|
||||||
|
for f in glob.glob('playbooks/*.yaml'):
|
||||||
|
with open(f) as fh:
|
||||||
|
try:
|
||||||
|
data = yaml.safe_load(fh)
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
print(f'ERROR: {f} is not a YAML mapping')
|
||||||
|
sys.exit(1)
|
||||||
|
missing = required_keys - set(data.keys())
|
||||||
|
if missing:
|
||||||
|
print(f'WARNING: {f} missing keys: {missing}')
|
||||||
|
print(f'OK: {f}')
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
print(f'ERROR: {f}: {e}')
|
||||||
|
sys.exit(1)
|
||||||
Reference in New Issue
Block a user