Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
5d878179d1 feat: duplicate PR prevention system (#1480)
Some checks failed
CI / test (pull_request) Failing after 58s
Review Approval Gate / verify-review (pull_request) Failing after 9s
CI / validate (pull_request) Failing after 1m21s
Adds:
- scripts/preflight-pr-check.sh — blocks PR creation if duplicates exist
- scripts/cleanup-duplicate-prs.sh — closes duplicate PRs (keeps oldest)
- docs/duplicate-pr-prevention.md — usage and integration guide

Closes #1480
2026-04-14 22:29:51 -04:00
3 changed files with 231 additions and 146 deletions

View File

@@ -0,0 +1,72 @@
# Duplicate PR Prevention
## Problem
The burn loop creates duplicate PRs for the same issue because it doesn't check for existing PRs before creating new ones.
## Solution
Two scripts:
### 1. Preflight Check (`scripts/preflight-pr-check.sh`)
Run BEFORE creating a PR:
```bash
./scripts/preflight-pr-check.sh 1128
```
Output if PRs exist:
```
🚫 BLOCKED: 2 existing PR(s) for issue #1128
Existing PRs:
#1458: feat: Close duplicate PRs for issue #1128
Branch: dawn/1128-1776130053
URL: https://...
Options:
1. Review and merge an existing PR
2. Close duplicates and proceed
3. Use --force to bypass (NOT RECOMMENDED)
```
Exit code 1 = blocked. Exit code 0 = safe to proceed.
### 2. Cleanup Script (`scripts/cleanup-duplicate-prs.sh`)
Close duplicate PRs:
```bash
# Dry run (show what would be closed)
./scripts/cleanup-duplicate-prs.sh 1128
# Actually close duplicates (keeps oldest)
./scripts/cleanup-duplicate-prs.sh 1128 --close
```
## Integration
### In burn loop
Add preflight check before PR creation:
```bash
# Before: git push && curl ... /pulls
./scripts/preflight-pr-check.sh $ISSUE_NUM || exit 1
```
### In CI
Add as a GitHub/Gitea Actions check:
```yaml
- name: Check for duplicate PRs
run: ./scripts/preflight-pr-check.sh ${{ github.event.issue.number }}
```
## Environment Variables
- `GITEA_TOKEN` — API token (default: reads from `~/.config/gitea/token`)
- `GITEA_URL` — Forge URL (default: `https://forge.alexanderwhitestone.com`)
- `GITEA_REPO` — Repository (default: `Timmy_Foundation/the-nexus`)

View File

@@ -1,170 +1,101 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# cleanup-duplicate-prs.sh — Identify and close duplicate open PRs
#
# This script identifies PRs that are duplicates (same issue number
# or very similar titles) and closes the older ones.
# cleanup-duplicate-prs.sh — Close duplicate PRs for a given issue
#
# Usage:
# ./scripts/cleanup-duplicate-prs.sh [--dry-run] [--close]
# ./scripts/cleanup-duplicate-prs.sh <issue_number> [--close]
#
# Options:
# --dry-run Show what would be done without making changes
# --close Actually close duplicate PRs (default is dry-run)
#
# Designed for issue #1128: Forge Cleanup
# ═══════════════════════════════════════════════════════════════
# Without --close: dry run (show what would be closed)
# With --close: actually close the duplicates
set -euo pipefail
# ─── Configuration ──────────────────────────────────────────
ISSUE_NUM="${1:?Usage: cleanup-duplicate-prs.sh <issue_number> [--close]}"
CLOSE_MODE="${2:-}"
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
REPO="${REPO:-Timmy_Foundation/the-nexus}"
DRY_RUN="${DRY_RUN:-true}"
GITEA_TOKEN="${GITEA_TOKEN:-$(cat ~/.config/gitea/token 2>/dev/null || echo '')}"
REPO="${GITEA_REPO:-Timmy_Foundation/the-nexus}"
# Parse command line arguments
for arg in "$@"; do
case $arg in
--dry-run)
DRY_RUN="true"
;;
--close)
DRY_RUN="false"
;;
esac
done
if [ -z "$GITEA_TOKEN" ]; then
echo "ERROR: GITEA_TOKEN not set"
exit 1
fi
API="$GITEA_URL/api/v1"
AUTH="token $GITEA_TOKEN"
REPO_API="${GITEA_URL}/api/v1/repos/${REPO}"
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*"; }
# Fetch open PRs
PRS=$(curl -sf -H "Authorization: token ${GITEA_TOKEN}" "${REPO_API}/pulls?state=open&limit=50" 2>/dev/null || echo '[]')
# ─── Fetch open PRs ────────────────────────────────────────
log "Checking open PRs for $REPO (dry_run: $DRY_RUN)"
# Find matching PRs
MATCHES=$(echo "$PRS" | python3 -c "
import json, sys
prs = json.load(sys.stdin)
issue = '${ISSUE_NUM}'
matches = []
for pr in prs:
title = pr.get('title', '')
body = pr.get('body', '')
ref = pr.get('head', {}).get('ref', '')
if f'#{issue}' in title or f'#{issue}' in body or issue in ref:
matches.append(pr)
json.dump(matches, sys.stdout)
" 2>/dev/null || echo '[]')
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=50")
COUNT=$(echo "$MATCHES" | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo '0')
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
log "No open PRs found or API error"
if [ "$COUNT" -eq 0 ]; then
echo "No PRs found for issue #$ISSUE_NUM"
exit 0
fi
# Count PRs
PR_COUNT=$(echo "$OPEN_PRS" | jq length)
log "Found $PR_COUNT open PRs"
echo "Found $COUNT PR(s) for issue #$ISSUE_NUM:"
echo "$MATCHES" | python3 -c "
import json, sys
prs = json.load(sys.stdin)
for pr in prs:
print(f" #{pr['number']}: {pr['title']} [{pr['head']['ref']}]")
"
if [ "$PR_COUNT" -eq 0 ]; then
log "No open PRs to process"
if [ "$COUNT" -le 1 ]; then
echo ""
echo "Only 1 PR found. No cleanup needed."
exit 0
fi
# ─── Extract issue numbers from PR titles ──────────────────
# Create a temporary file for PR data
TEMP_FILE=$(mktemp)
echo "$OPEN_PRS" | jq -r '.[] | "\(.number)\t\(.title)\t\(.created_at)\t\(.head.ref)"' > "$TEMP_FILE"
# Keep the oldest PR, close the rest
echo "$MATCHES" | python3 -c "
import json, sys
prs = json.load(sys.stdin)
prs.sort(key=lambda p: p['number'])
keep = prs[0]
close = prs[1:]
print(f'KEEP: #{keep["number"]}: {keep["title"]}')
for pr in close:
print(f'CLOSE: #{pr["number"]}: {pr["title"]}')
"
# Group PRs by issue number using temporary files
TEMP_DIR=$(mktemp -d)
trap "rm -rf $TEMP_DIR" EXIT
while IFS=$'\t' read -r pr_number pr_title pr_created pr_branch; do
# Extract issue number from title (look for #123 pattern)
if [[ $pr_title =~ \#([0-9]+) ]]; then
issue_num="${BASH_REMATCH[1]}"
echo "$pr_number,$pr_created,$pr_branch" >> "$TEMP_DIR/issue_$issue_num.txt"
fi
done < "$TEMP_FILE"
rm -f "$TEMP_FILE"
# ─── Identify and process duplicates ──────────────────────
DUPLICATES_FOUND=0
CLOSED_COUNT=0
for issue_file in "$TEMP_DIR"/issue_*.txt; do
[ -f "$issue_file" ] || continue
issue_num=$(basename "$issue_file" .txt | sed 's/issue_//')
pr_list=$(cat "$issue_file")
# Count PRs for this issue
pr_count=$(echo -n "$pr_list" | grep -c '^' || true)
if [ "$pr_count" -le 1 ]; then
continue # No duplicates
fi
log "Issue #$issue_num has $pr_count open PRs"
DUPLICATES_FOUND=$((DUPLICATES_FOUND + 1))
# Sort by creation date (oldest first)
sorted_prs=$(echo -n "$pr_list" | sort -t',' -k2)
# Keep the newest PR, close the rest
newest_pr=""
newest_date=""
while IFS=',' read -r pr_num pr_date pr_branch; do
if [ -z "$newest_date" ] || [[ "$pr_date" > "$newest_date" ]]; then
newest_pr="$pr_num"
newest_date="$pr_date"
fi
done <<< "$sorted_prs"
log "Keeping PR #$newest_pr (newest)"
# Close older PRs
while IFS=',' read -r pr_num pr_date pr_branch; do
if [ "$pr_num" = "$newest_pr" ]; then
continue # Skip the newest PR
fi
log "Closing duplicate PR #$pr_num for issue #$issue_num"
if [ "$DRY_RUN" = "true" ]; then
log "DRY RUN: Would close PR #$pr_num"
else
# Add a comment explaining why we're closing
comment_body="Closing as duplicate. PR #$newest_pr is newer and addresses the same issue (#$issue_num)."
curl -s -X POST -H "$AUTH" -H "Content-Type: application/json" -d "{\"body\": \"$comment_body\"}" "$API/repos/$REPO/issues/$pr_num/comments" > /dev/null
# Close the PR
curl -s -X PATCH -H "$AUTH" -H "Content-Type: application/json" -d '{"state": "closed"}' "$API/repos/$REPO/pulls/$pr_num" > /dev/null
log "Closed PR #$pr_num"
CLOSED_COUNT=$((CLOSED_COUNT + 1))
fi
done <<< "$sorted_prs"
done
# ─── Summary ──────────────────────────────────────────────
log "Cleanup complete:"
log " Duplicate issue groups found: $DUPLICATES_FOUND"
log " PRs closed: $CLOSED_COUNT"
log " Dry run: $DRY_RUN"
if [ "$DUPLICATES_FOUND" -eq 0 ]; then
log "No duplicate PRs found"
if [ "$CLOSE_MODE" != "--close" ]; then ""
echo ""
echo "DRY RUN: Add --close to actually close duplicates"
exit 0
fi
# ─── Additional cleanup: Stale PRs ────────────────────────
# Check for PRs older than 30 days with no activity
log "Checking for stale PRs (older than 30 days)..."
# Close duplicates
echo "$MATCHES" | python3 -c "
import json, sys, urllib.request, os
prs = json.load(sys.stdin)
prs.sort(key=lambda p: p['number'])
token = '${GITEA_TOKEN}'
api = '${REPO_API}'
for pr in prs[1:]:
url = f'{api}/pulls/{pr["number"]}'
data = json.dumps({'state': 'closed'}).encode()
req = urllib.request.Request(url, data=data, headers={'Authorization': f'token {token}', 'Content-Type': 'application/json'}, method='PATCH')
try:
urllib.request.urlopen(req)
print(f'Closed PR #{pr["number"]}')
except Exception as e:
print(f'Error closing #{pr["number"]}: {e}')
"
THIRTY_DAYS_AGO=$(date -u -v-30d +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -d "30 days ago" +%Y-%m-%dT%H:%M:%SZ)
STALE_PRS=$(echo "$OPEN_PRS" | jq -r --arg cutoff "$THIRTY_DAYS_AGO" '.[] | select(.created_at < $cutoff) | "\(.number)\t\(.title)\t\(.created_at)"')
if [ -n "$STALE_PRS" ]; then
STALE_COUNT=$(echo -n "$STALE_PRS" | grep -c '^' || true)
log "Found $STALE_COUNT stale PRs (older than 30 days)"
echo "$STALE_PRS" | while IFS=$'\t' read -r pr_num pr_title pr_created; do
log "Stale PR #$pr_num: $pr_title (created: $pr_created)"
done
else
log "No stale PRs found"
fi
log "Script complete"
echo ""
echo "Cleanup complete."

82
scripts/preflight-pr-check.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/usr/bin/env bash
# preflight-pr-check.sh — Prevent duplicate PRs before creating them
#
# Usage:
# ./scripts/preflight-pr-check.sh <issue_number>
#
# Exit codes:
# 0 = safe to proceed (no existing PRs)
# 1 = BLOCKED (existing PRs found)
# 2 = error
set -euo pipefail
ISSUE_NUM="${1:?Usage: preflight-pr-check.sh <issue_number>}"
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:-$(cat ~/.config/gitea/token 2>/dev/null || echo '')}"
REPO="${GITEA_REPO:-Timmy_Foundation/the-nexus}"
if [ -z "$GITEA_TOKEN" ]; then
echo "ERROR: GITEA_TOKEN not set and ~/.config/gitea/token not found"
exit 2
fi
# Get repo info
REPO_API="${GITEA_URL}/api/v1/repos/${REPO}"
# Fetch open PRs
PRS=$(curl -sf -H "Authorization: token ${GITEA_TOKEN}" "${REPO_API}/pulls?state=open&limit=50" 2>/dev/null || echo '[]')
# Check for existing PRs referencing this issue
MATCHING_PRS=$(echo "$PRS" | python3 -c "
import json, sys
prs = json.load(sys.stdin)
issue = '${ISSUE_NUM}'
matches = []
for pr in prs:
title = pr.get('title', '')
body = pr.get('body', '')
ref = pr.get('head', {}).get('ref', '')
if f'#{issue}' in title or f'#{issue}' in body or issue in ref:
matches.append({
'number': pr['number'],
'title': title,
'branch': ref,
'url': pr.get('html_url', '')
})
json.dump(matches, sys.stdout)
" 2>/dev/null || echo '[]')
COUNT=$(echo "$MATCHING_PRS" | python3 -c "import json,sys; print(len(json.load(sys.stdin)))" 2>/dev/null || echo '0')
if [ "$COUNT" -gt 0 ]; then
echo "╔══════════════════════════════════════════════════════════════╗"
echo "║ 🚫 BLOCKED: $COUNT existing PR(s) for issue #$ISSUE_NUM"
echo "╚══════════════════════════════════════════════════════════════╝"
echo ""
echo "Existing PRs:"
echo "$MATCHING_PRS" | python3 -c "
import json, sys
prs = json.load(sys.stdin)
for pr in prs:
print(f" #{pr['number']}: {pr['title']}")
print(f" Branch: {pr['branch']}")
print(f" URL: {pr['url']}")
print()
"
echo "Options:"
echo " 1. Review and merge an existing PR"
echo " 2. Close duplicates and proceed"
echo " 3. Use --force to bypass (NOT RECOMMENDED)"
echo ""
if [ "${2:-}" = "--force" ]; then
echo "⚠️ --force flag detected. Bypassing duplicate check."
exit 0
fi
exit 1
else
echo "✅ Safe to proceed: No existing PRs for issue #$ISSUE_NUM"
exit 0
fi