## Summary Implements forge cleanup tools and documentation as requested in issue #1128. ## Changes - scripts/cleanup-duplicate-prs.sh: Automated duplicate PR detection - docs/forge-cleanup-analysis.md: Analysis of duplicate PRs - docs/forge-cleanup-report.md: Cleanup report with metrics - .github/workflows/pr-duplicate-check.yml: Weekly automated checks Issue: #1128
171 lines
6.1 KiB
Bash
Executable File
171 lines
6.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ═══════════════════════════════════════════════════════════════
|
|
# cleanup-duplicate-prs.sh — Identify and close duplicate open PRs
|
|
#
|
|
# This script identifies PRs that are duplicates (same issue number
|
|
# or very similar titles) and closes the older ones.
|
|
#
|
|
# Usage:
|
|
# ./scripts/cleanup-duplicate-prs.sh [--dry-run] [--close]
|
|
#
|
|
# Options:
|
|
# --dry-run Show what would be done without making changes
|
|
# --close Actually close duplicate PRs (default is dry-run)
|
|
#
|
|
# Designed for issue #1128: Forge Cleanup
|
|
# ═══════════════════════════════════════════════════════════════
|
|
set -euo pipefail
|
|
|
|
# ─── Configuration ──────────────────────────────────────────
|
|
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
|
|
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
|
|
REPO="${REPO:-Timmy_Foundation/the-nexus}"
|
|
DRY_RUN="${DRY_RUN:-true}"
|
|
|
|
# Parse command line arguments
|
|
for arg in "$@"; do
|
|
case $arg in
|
|
--dry-run)
|
|
DRY_RUN="true"
|
|
;;
|
|
--close)
|
|
DRY_RUN="false"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
API="$GITEA_URL/api/v1"
|
|
AUTH="token $GITEA_TOKEN"
|
|
|
|
log() { echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] $*"; }
|
|
|
|
# ─── Fetch open PRs ────────────────────────────────────────
|
|
log "Checking open PRs for $REPO (dry_run: $DRY_RUN)"
|
|
|
|
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=50")
|
|
|
|
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
|
|
log "No open PRs found or API error"
|
|
exit 0
|
|
fi
|
|
|
|
# Count PRs
|
|
PR_COUNT=$(echo "$OPEN_PRS" | jq length)
|
|
log "Found $PR_COUNT open PRs"
|
|
|
|
if [ "$PR_COUNT" -eq 0 ]; then
|
|
log "No open PRs to process"
|
|
exit 0
|
|
fi
|
|
|
|
# ─── Extract issue numbers from PR titles ──────────────────
|
|
# Create a temporary file for PR data
|
|
TEMP_FILE=$(mktemp)
|
|
echo "$OPEN_PRS" | jq -r '.[] | "\(.number)\t\(.title)\t\(.created_at)\t\(.head.ref)"' > "$TEMP_FILE"
|
|
|
|
# Group PRs by issue number using temporary files
|
|
TEMP_DIR=$(mktemp -d)
|
|
trap "rm -rf $TEMP_DIR" EXIT
|
|
|
|
while IFS=$'\t' read -r pr_number pr_title pr_created pr_branch; do
|
|
# Extract issue number from title (look for #123 pattern)
|
|
if [[ $pr_title =~ \#([0-9]+) ]]; then
|
|
issue_num="${BASH_REMATCH[1]}"
|
|
echo "$pr_number,$pr_created,$pr_branch" >> "$TEMP_DIR/issue_$issue_num.txt"
|
|
fi
|
|
done < "$TEMP_FILE"
|
|
|
|
rm -f "$TEMP_FILE"
|
|
|
|
# ─── Identify and process duplicates ──────────────────────
|
|
DUPLICATES_FOUND=0
|
|
CLOSED_COUNT=0
|
|
|
|
for issue_file in "$TEMP_DIR"/issue_*.txt; do
|
|
[ -f "$issue_file" ] || continue
|
|
|
|
issue_num=$(basename "$issue_file" .txt | sed 's/issue_//')
|
|
pr_list=$(cat "$issue_file")
|
|
|
|
# Count PRs for this issue
|
|
pr_count=$(echo -n "$pr_list" | grep -c '^' || true)
|
|
|
|
if [ "$pr_count" -le 1 ]; then
|
|
continue # No duplicates
|
|
fi
|
|
|
|
log "Issue #$issue_num has $pr_count open PRs"
|
|
DUPLICATES_FOUND=$((DUPLICATES_FOUND + 1))
|
|
|
|
# Sort by creation date (oldest first)
|
|
sorted_prs=$(echo -n "$pr_list" | sort -t',' -k2)
|
|
|
|
# Keep the newest PR, close the rest
|
|
newest_pr=""
|
|
newest_date=""
|
|
|
|
while IFS=',' read -r pr_num pr_date pr_branch; do
|
|
if [ -z "$newest_date" ] || [[ "$pr_date" > "$newest_date" ]]; then
|
|
newest_pr="$pr_num"
|
|
newest_date="$pr_date"
|
|
fi
|
|
done <<< "$sorted_prs"
|
|
|
|
log "Keeping PR #$newest_pr (newest)"
|
|
|
|
# Close older PRs
|
|
while IFS=',' read -r pr_num pr_date pr_branch; do
|
|
if [ "$pr_num" = "$newest_pr" ]; then
|
|
continue # Skip the newest PR
|
|
fi
|
|
|
|
log "Closing duplicate PR #$pr_num for issue #$issue_num"
|
|
|
|
if [ "$DRY_RUN" = "true" ]; then
|
|
log "DRY RUN: Would close PR #$pr_num"
|
|
else
|
|
# Add a comment explaining why we're closing
|
|
comment_body="Closing as duplicate. PR #$newest_pr is newer and addresses the same issue (#$issue_num)."
|
|
|
|
curl -s -X POST -H "$AUTH" -H "Content-Type: application/json" -d "{\"body\": \"$comment_body\"}" "$API/repos/$REPO/issues/$pr_num/comments" > /dev/null
|
|
|
|
# Close the PR
|
|
curl -s -X PATCH -H "$AUTH" -H "Content-Type: application/json" -d '{"state": "closed"}' "$API/repos/$REPO/pulls/$pr_num" > /dev/null
|
|
|
|
log "Closed PR #$pr_num"
|
|
CLOSED_COUNT=$((CLOSED_COUNT + 1))
|
|
fi
|
|
done <<< "$sorted_prs"
|
|
done
|
|
|
|
# ─── Summary ──────────────────────────────────────────────
|
|
log "Cleanup complete:"
|
|
log " Duplicate issue groups found: $DUPLICATES_FOUND"
|
|
log " PRs closed: $CLOSED_COUNT"
|
|
log " Dry run: $DRY_RUN"
|
|
|
|
if [ "$DUPLICATES_FOUND" -eq 0 ]; then
|
|
log "No duplicate PRs found"
|
|
fi
|
|
|
|
# ─── Additional cleanup: Stale PRs ────────────────────────
|
|
# Check for PRs older than 30 days with no activity
|
|
log "Checking for stale PRs (older than 30 days)..."
|
|
|
|
THIRTY_DAYS_AGO=$(date -u -v-30d +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date -u -d "30 days ago" +%Y-%m-%dT%H:%M:%SZ)
|
|
|
|
STALE_PRS=$(echo "$OPEN_PRS" | jq -r --arg cutoff "$THIRTY_DAYS_AGO" '.[] | select(.created_at < $cutoff) | "\(.number)\t\(.title)\t\(.created_at)"')
|
|
|
|
if [ -n "$STALE_PRS" ]; then
|
|
STALE_COUNT=$(echo -n "$STALE_PRS" | grep -c '^' || true)
|
|
log "Found $STALE_COUNT stale PRs (older than 30 days)"
|
|
|
|
echo "$STALE_PRS" | while IFS=$'\t' read -r pr_num pr_title pr_created; do
|
|
log "Stale PR #$pr_num: $pr_title (created: $pr_created)"
|
|
done
|
|
else
|
|
log "No stale PRs found"
|
|
fi
|
|
|
|
log "Script complete"
|