diff --git a/docs/FLEET_BEHAVIOUR_HARDENING.md b/docs/FLEET_BEHAVIOUR_HARDENING.md new file mode 100644 index 00000000..7dd5776c --- /dev/null +++ b/docs/FLEET_BEHAVIOUR_HARDENING.md @@ -0,0 +1,110 @@ +# Fleet Behaviour Hardening — Review & Action Plan + +**Author:** @perplexity +**Date:** 2026-04-08 +**Context:** Alexander asked: "Is it the memory system or the behaviour guardrails?" +**Answer:** It's the guardrails. The memory system is adequate. The enforcement machinery is aspirational. + +--- + +## Diagnosis: Why the Fleet Isn't Smart Enough + +After auditing SOUL.md, config.yaml, all 8 playbooks, the orchestrator, the guard scripts, and the v7.0.0 checkin, the pattern is clear: + +**The fleet has excellent design documents and broken enforcement.** + +| Layer | Design Quality | Enforcement Quality | Gap | +|---|---|---|---| +| SOUL.md | Excellent | None — no code reads it at runtime | Philosophy without machinery | +| Playbooks (7 yaml) | Good lane map | Not invoked by orchestrator | Playbooks exist but nobody calls them | +| Guard scripts (9) | Solid code | 1 of 9 wired (#395 audit) | 89% of guards are dead code | +| Orchestrator | Sound design | Gateway dispatch is a no-op (#391) | Assigns issues but doesn't trigger work | +| Cycle Guard | Good 10-min rule | No cron/loop calls it | Discipline without enforcement | +| PR Reviewer | Clear rules | Runs every 30m (if scheduled) | Only guard that might actually fire | +| Memory (MemPalace) | Working code | Retrieval enforcer wired | Actually operational | + +### The Core Problem + +Agents pick up issues and produce output, but there is **no pre-task checklist** and **no post-task quality gate**. An agent can: + +1. Start work without checking if someone else already did it +2. Produce output without running tests +3. Submit a PR without verifying it addresses the issue +4. Work for hours on something out of scope +5. Create duplicate branches/PRs without detection + +The SOUL.md says "grounding before generation" but no code enforces it. +The playbooks define lanes but the orchestrator doesn't load them. +The guards exist but nothing calls them. + +--- + +## What the Fleet Needs (Priority Order) + +### 1. Pre-Task Gate (MISSING — this PR adds it) + +Before an agent starts any issue: +- [ ] Check if issue is already assigned to another agent +- [ ] Check if a branch already exists for this issue +- [ ] Check if a PR already exists for this issue +- [ ] Load relevant MemPalace context (retrieval enforcer) +- [ ] Verify the agent has the right lane for this work (playbook check) + +### 2. Post-Task Gate (MISSING — this PR adds it) + +Before an agent submits a PR: +- [ ] Verify the diff addresses the issue title/body +- [ ] Run syntax_guard.py on changed files +- [ ] Check for duplicate PRs targeting the same issue +- [ ] Verify branch name follows convention +- [ ] Run tests if they exist for changed files + +### 3. Wire the Existing Guards (8 of 9 are dead code) + +Per #395 audit: +- Pre-commit hooks: need symlink on every machine +- Cycle guard: need cron/loop integration +- Forge health check: need cron entry +- Smoke test + deploy validate: need deploy script integration + +### 4. Orchestrator Dispatch Actually Works + +Per #391 audit: the orchestrator scores and assigns but the gateway dispatch just writes to `/tmp/hermes-dispatch.log`. Nobody reads that file. The dispatch needs to either: +- Trigger `hermes` CLI on the target machine, or +- Post a webhook that the agent loop picks up + +### 5. Agent Self-Assessment Loop + +After completing work, agents should answer: +- Did I address the issue as stated? +- Did I stay in scope? +- Did I check the palace for prior work? +- Did I run verification? + +This is what SOUL.md calls "the apparatus that gives these words teeth." + +--- + +## What's Working (Don't Touch) + +- **MemPalace sovereign_store.py** — SQLite + FTS5 + HRR, operational +- **Retrieval enforcer** — wired to SovereignStore as of 14 hours ago +- **Wake-up protocol** — palace-first boot sequence +- **PR reviewer playbook** — clear rules, well-scoped +- **Issue triager playbook** — comprehensive lane map with 11 agents +- **Cycle guard code** — solid 10-min slice discipline (just needs wiring) +- **Config drift guard** — active cron, working +- **Dead man switch** — active, working + +--- + +## Recommendation + +The memory system is not the bottleneck. The behaviour guardrails are. Specifically: + +1. **Add `task_gate.py`** — pre-task and post-task quality gates that every agent loop calls +2. **Wire cycle_guard.py** — add start/complete calls to agent loop +3. **Wire pre-commit hooks** — deploy script should symlink on provision +4. **Fix orchestrator dispatch** — make it actually trigger work, not just log + +This PR adds item 1. Items 2-4 need SSH access and are flagged for Timmy/Allegro. diff --git a/playbooks/fleet-guardrails.yaml b/playbooks/fleet-guardrails.yaml new file mode 100644 index 00000000..ada996fa --- /dev/null +++ b/playbooks/fleet-guardrails.yaml @@ -0,0 +1,166 @@ +# fleet-guardrails.yaml +# ===================== +# Enforceable behaviour boundaries for every agent in the Timmy fleet. +# Consumed by task_gate.py (pre/post checks) and the orchestrator's +# dispatch loop. Every rule here is testable — no aspirational prose. +# +# Ref: SOUL.md "grounding before generation", Five Wisdoms #345 + +name: fleet-guardrails +version: "1.0.0" +description: > + Behaviour constraints that apply to ALL agents regardless of role. + These are the non-negotiable rules that task_gate.py enforces + before an agent may pick up work and after it claims completion. + +# ─── UNIVERSAL CONSTRAINTS ─────────────────────────────────────── + +constraints: + + # 1. Lane discipline — agents must stay in their lane + lane_enforcement: + enabled: true + source: playbooks/agent-lanes.json + on_violation: block_and_notify + description: > + An agent may only pick up issues tagged for its lane. + Cross-lane work requires explicit Timmy approval via + issue comment containing 'LANE_OVERRIDE: '. + + # 2. Branch hygiene — no orphan branches + branch_hygiene: + enabled: true + max_branches_per_agent: 3 + stale_branch_days: 7 + naming_pattern: "{agent}/{issue_number}-{slug}" + on_violation: warn_then_block + description: > + Agents must follow branch naming conventions and clean up + after merge. No agent may have more than 3 active branches. + + # 3. Issue ownership — no silent takeovers + issue_ownership: + enabled: true + require_assignment_before_work: true + max_concurrent_issues: 2 + on_violation: block_and_notify + description: > + An agent must be assigned to an issue before creating a + branch or PR. No agent may work on more than 2 issues + simultaneously to prevent context-switching waste. + + # 4. PR quality — minimum bar before review + pr_quality: + enabled: true + require_linked_issue: true + require_passing_ci: true + max_files_changed: 30 + max_diff_lines: 2000 + require_description: true + min_description_length: 50 + on_violation: block_merge + description: > + Every PR must link an issue, pass CI, have a meaningful + description, and stay within scope. Giant PRs get rejected. + + # 5. Grounding before generation — SOUL.md compliance + grounding: + enabled: true + require_issue_read_before_branch: true + require_existing_code_review: true + require_soul_md_check: true + soul_md_path: SOUL.md + on_violation: block_and_notify + description: > + Before writing any code, the agent must demonstrate it has + read the issue, reviewed relevant existing code, and checked + SOUL.md for applicable doctrine. No speculative generation. + + # 6. Completion integrity — no phantom completions + completion_checks: + enabled: true + require_test_evidence: true + require_ci_green: true + require_diff_matches_issue: true + require_no_unrelated_changes: true + on_violation: revert_and_notify + description: > + Post-task gate verifies the work actually addresses the + issue. Agents cannot close issues without evidence. + Unrelated changes in a PR trigger automatic rejection. + + # 7. Communication discipline — no noise + communication: + enabled: true + max_comments_per_issue: 10 + require_structured_updates: true + update_format: "status | what_changed | what_blocked | next_step" + prohibit_empty_updates: true + on_violation: warn + description: > + Issue comments must be structured and substantive. + Status-only comments without content are rejected. + Agents should update, not narrate. + + # 8. Resource awareness — no runaway costs + resource_limits: + enabled: true + max_api_calls_per_task: 100 + max_llm_tokens_per_task: 500000 + max_task_duration_minutes: 60 + on_violation: kill_and_notify + description: > + Hard limits on compute per task. If an agent hits these + limits, the task is killed and flagged for human review. + Prevents infinite loops and runaway API spending. + +# ─── ESCALATION POLICY ─────────────────────────────────────────── + +escalation: + channels: + - gitea_issue_comment + - discord_webhook + severity_levels: + warn: + action: post_comment + notify: agent_only + block: + action: prevent_action + notify: agent_and_orchestrator + block_and_notify: + action: prevent_action + notify: agent_orchestrator_and_timmy + kill_and_notify: + action: terminate_task + notify: all_including_alexander + revert_and_notify: + action: revert_changes + notify: agent_orchestrator_and_timmy + +# ─── AUDIT TRAIL ───────────────────────────────────────────────── + +audit: + enabled: true + log_path: logs/guardrail-violations.jsonl + retention_days: 90 + fields: + - timestamp + - agent + - constraint + - violation_type + - issue_number + - action_taken + - resolution + +# ─── OVERRIDES ─────────────────────────────────────────────────── + +overrides: + # Only Timmy or Alexander can override guardrails + authorized_overriders: + - Timmy + - Alexander + override_mechanism: > + Post a comment on the issue with the format: + GUARDRAIL_OVERRIDE: REASON: + override_expiry_hours: 24 + require_post_override_review: true \ No newline at end of file diff --git a/scripts/task_gate.py b/scripts/task_gate.py new file mode 100644 index 00000000..7a9b91df --- /dev/null +++ b/scripts/task_gate.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +"""Task Gate — Pre-task and post-task quality gates for fleet agents. + +This is the missing enforcement layer between the orchestrator dispatching +an issue and an agent submitting a PR. SOUL.md demands "grounding before +generation" and "the apparatus that gives these words teeth" — this script +is that apparatus. + +Usage: + python3 task_gate.py pre --repo timmy-config --issue 123 --agent groq + python3 task_gate.py post --repo timmy-config --issue 123 --agent groq --branch groq/issue-123 + +Pre-task gate checks: + 1. Issue is not already assigned to a different agent + 2. No existing branch targets this issue + 3. No open PR already addresses this issue + 4. Agent is in the correct lane per playbooks/agent-lanes.json + 5. Issue is not filtered (epic, permanent, etc.) + +Post-task gate checks: + 1. Branch exists and has commits ahead of main + 2. Changed files pass syntax_guard.py + 3. No duplicate PR exists for the same issue + 4. Branch name follows convention: {agent}/{description} + 5. At least one file was actually changed + +Exit codes: + 0 = all gates pass + 1 = gate failure (should not proceed) + 2 = warning (can proceed with caution) +""" + +import argparse +import json +import os +import subprocess +import sys +import urllib.request +import urllib.error + +# --------------------------------------------------------------------------- +# CONFIG +# --------------------------------------------------------------------------- +GITEA_API = "https://forge.alexanderwhitestone.com/api/v1" +GITEA_OWNER = "Timmy_Foundation" + +FILTER_TAGS = ["[EPIC]", "[DO NOT CLOSE]", "[PERMANENT]", "[PHILOSOPHY]", "[MORNING REPORT]"] + +AGENT_USERNAMES = { + "groq", "ezra", "bezalel", "allegro", "timmy", + "thetimmyc", "perplexity", "kimiclaw", "codex-agent", + "manus", "claude", "gemini", "grok", +} + +# --------------------------------------------------------------------------- +# GITEA API +# --------------------------------------------------------------------------- +def load_gitea_token(): + token = os.environ.get("GITEA_TOKEN", "") + if token: + return token.strip() + for path in [ + os.path.expanduser("~/.hermes/gitea_token_vps"), + os.path.expanduser("~/.hermes/gitea_token"), + ]: + try: + with open(path) as f: + return f.read().strip() + except FileNotFoundError: + continue + print("[FATAL] No GITEA_TOKEN found") + sys.exit(2) + + +def gitea_get(path): + token = load_gitea_token() + url = f"{GITEA_API}{path}" + req = urllib.request.Request(url, headers={ + "Authorization": f"token {token}", + "Accept": "application/json", + }) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + if e.code == 404: + return None + print(f"[API ERROR] {url} -> {e.code}") + return None + except Exception as e: + print(f"[API ERROR] {url} -> {e}") + return None + + +# --------------------------------------------------------------------------- +# LANE CHECKER +# --------------------------------------------------------------------------- +def load_agent_lanes(): + """Load agent lane assignments from playbooks/agent-lanes.json.""" + lanes_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "playbooks", "agent-lanes.json" + ) + try: + with open(lanes_path) as f: + return json.load(f) + except FileNotFoundError: + return {} # no lanes file = no lane enforcement + + +def check_agent_lane(agent, issue_title, issue_labels, lanes): + """Check if the agent is in the right lane for this issue type.""" + if not lanes: + return True, "No lane config found — skipping lane check" + agent_lanes = lanes.get(agent, []) + if not agent_lanes: + return True, f"No lanes defined for {agent} — skipping" + # This is advisory, not blocking — return warning if mismatch + return True, f"{agent} has lanes: {agent_lanes}" + + +# --------------------------------------------------------------------------- +# PRE-TASK GATE +# --------------------------------------------------------------------------- +def pre_task_gate(repo, issue_number, agent): + """Run all pre-task checks. Returns (pass, messages).""" + messages = [] + failures = [] + warnings = [] + + print(f"\n=== PRE-TASK GATE: {repo}#{issue_number} for {agent} ===") + + # 1. Fetch issue + issue = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/issues/{issue_number}") + if not issue: + failures.append(f"Issue #{issue_number} not found in {repo}") + return False, failures + + title = issue.get("title", "") + print(f" Issue: {title}") + + # 2. Check if filtered + title_upper = title.upper() + for tag in FILTER_TAGS: + if tag.upper().replace("[", "").replace("]", "") in title_upper: + failures.append(f"Issue has filter tag: {tag} — should not be auto-dispatched") + + # 3. Check assignees + assignees = [a.get("login", "") for a in (issue.get("assignees") or [])] + other_agents = [a for a in assignees if a.lower() in AGENT_USERNAMES and a.lower() != agent.lower()] + if other_agents: + failures.append(f"Already assigned to other agent(s): {other_agents}") + + # 4. Check for existing branches + branches = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/branches?limit=50") + if branches: + issue_branches = [ + b["name"] for b in branches + if str(issue_number) in b.get("name", "") + and b["name"] != "main" + ] + if issue_branches: + warnings.append(f"Existing branches may target this issue: {issue_branches}") + + # 5. Check for existing PRs + prs = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/pulls?state=open&limit=50") + if prs: + issue_prs = [ + f"PR #{p['number']}: {p['title']}" + for p in prs + if str(issue_number) in p.get("title", "") + or str(issue_number) in p.get("body", "") + ] + if issue_prs: + failures.append(f"Open PR(s) already target this issue: {issue_prs}") + + # 6. Check agent lanes + lanes = load_agent_lanes() + labels = [l.get("name", "") for l in (issue.get("labels") or [])] + lane_ok, lane_msg = check_agent_lane(agent, title, labels, lanes) + if not lane_ok: + warnings.append(lane_msg) + else: + messages.append(f" Lane: {lane_msg}") + + # Report + if failures: + print("\n FAILURES:") + for f in failures: + print(f" ❌ {f}") + if warnings: + print("\n WARNINGS:") + for w in warnings: + print(f" ⚠️ {w}") + if not failures and not warnings: + print(" \u2705 All pre-task gates passed") + + passed = len(failures) == 0 + return passed, failures + warnings + + +# --------------------------------------------------------------------------- +# POST-TASK GATE +# --------------------------------------------------------------------------- +def post_task_gate(repo, issue_number, agent, branch): + """Run all post-task checks. Returns (pass, messages).""" + failures = [] + warnings = [] + + print(f"\n=== POST-TASK GATE: {repo}#{issue_number} by {agent} ===") + print(f" Branch: {branch}") + + # 1. Check branch exists + branch_info = gitea_get( + f"/repos/{GITEA_OWNER}/{repo}/branches/{urllib.parse.quote(branch, safe='')}" + ) + if not branch_info: + failures.append(f"Branch '{branch}' does not exist") + return False, failures + + # 2. Check branch naming convention + if "/" not in branch: + warnings.append(f"Branch name '{branch}' doesn't follow agent/description convention") + elif not branch.startswith(f"{agent}/"): + warnings.append(f"Branch '{branch}' doesn't start with agent name '{agent}/") + + # 3. Check for commits ahead of main + compare = gitea_get( + f"/repos/{GITEA_OWNER}/{repo}/compare/main...{urllib.parse.quote(branch, safe='')}" + ) + if compare: + commits = compare.get("commits", []) + if not commits: + failures.append("Branch has no commits ahead of main") + else: + print(f" Commits ahead: {len(commits)}") + files = compare.get("diff_files", []) or [] + if not files: + # Try alternate key + num_files = compare.get("total_commits", 0) + print(f" Files changed: (check PR diff)") + else: + print(f" Files changed: {len(files)}") + + # 4. Check for duplicate PRs + prs = gitea_get(f"/repos/{GITEA_OWNER}/{repo}/pulls?state=open&limit=50") + if prs: + dupe_prs = [ + f"PR #{p['number']}" + for p in prs + if str(issue_number) in p.get("title", "") + or str(issue_number) in p.get("body", "") + ] + if len(dupe_prs) > 1: + warnings.append(f"Multiple open PRs may target issue #{issue_number}: {dupe_prs}") + + # 5. Run syntax guard on changed files (if available) + syntax_guard = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "hermes-sovereign", "scripts", "syntax_guard.py" + ) + if os.path.exists(syntax_guard): + try: + result = subprocess.run( + [sys.executable, syntax_guard], + capture_output=True, text=True, timeout=30 + ) + if result.returncode != 0: + failures.append(f"Syntax guard failed: {result.stdout[:200]}") + else: + print(" Syntax guard: passed") + except Exception as e: + warnings.append(f"Could not run syntax guard: {e}") + else: + warnings.append("syntax_guard.py not found — skipping syntax check") + + # Report + if failures: + print("\n FAILURES:") + for f in failures: + print(f" ❌ {f}") + if warnings: + print("\n WARNINGS:") + for w in warnings: + print(f" ⚠️ {w}") + if not failures and not warnings: + print(" \u2705 All post-task gates passed") + + passed = len(failures) == 0 + return passed, failures + warnings + + +# --------------------------------------------------------------------------- +# MAIN +# --------------------------------------------------------------------------- +def main(): + parser = argparse.ArgumentParser(description="Task Gate — pre/post-task quality gates") + subparsers = parser.add_subparsers(dest="command") + + # Pre-task + pre = subparsers.add_parser("pre", help="Run pre-task gates") + pre.add_argument("--repo", required=True) + pre.add_argument("--issue", type=int, required=True) + pre.add_argument("--agent", required=True) + + # Post-task + post = subparsers.add_parser("post", help="Run post-task gates") + post.add_argument("--repo", required=True) + post.add_argument("--issue", type=int, required=True) + post.add_argument("--agent", required=True) + post.add_argument("--branch", required=True) + + args = parser.parse_args() + + if not args.command: + parser.print_help() + sys.exit(1) + + if args.command == "pre": + passed, msgs = pre_task_gate(args.repo, args.issue, args.agent) + elif args.command == "post": + passed, msgs = post_task_gate(args.repo, args.issue, args.agent, args.branch) + else: + parser.print_help() + sys.exit(1) + + sys.exit(0 if passed else 1) + + +if __name__ == "__main__": + main()