Compare commits

..

1 Commits

Author SHA1 Message Date
Timmy
25b26c536d fix(#1480): Add duplicate PR prevention mechanism
Some checks failed
CI / test (pull_request) Failing after 51s
CI / validate (pull_request) Failing after 40s
Review Approval Gate / verify-review (pull_request) Failing after 6s
Agents keep creating duplicate PRs for the same issue (#1128
had 4+ duplicate PRs). This adds a mandatory preflight check.

Files:
  - scripts/pr-preflight-check.sh: bash preflight (exit 1 if duplicates)
  - scripts/pr_preflight_check.py: Python preflight (for agent workflows)
  - docs/DUPLICATE_PR_PREVENTION.md: usage documentation

Closes #1480, #1460, #1474
2026-04-14 18:59:02 -04:00
5 changed files with 212 additions and 147 deletions

View File

@@ -0,0 +1,50 @@
# Duplicate PR Prevention
## The Problem
Issue #1128 documented a cleanup of duplicate PRs. Agents then created
4+ duplicate PRs *for issue #1128 itself*. The irony was not lost on anyone.
See: #1449, #1460, #1474, #1480.
## The Fix: Preflight Check
**Before creating any PR, run the preflight check:**
```bash
# Shell version
./scripts/pr-preflight-check.sh <issue_number>
# Python version
python3 scripts/pr_preflight_check.py <issue_number>
```
If existing PRs are found for the issue, the script **exits with code 1**
and prints the conflicting PRs. DO NOT proceed to create a new PR.
## Agent Workflow
```
1. Read issue
2. Clone repo
3. Implement fix
4. Commit
5. >>> RUN pr_preflight_check.py <issue_number> <<<
6. If exit 0: safe to push and create PR
7. If exit 1: STOP — review existing PRs first
8. Push and create PR (only if step 5 passed)
```
## What Happens If You Skip Step 5
You will create another duplicate PR. The cleanup script will find it.
Someone will close it. You will have wasted compute and created noise.
## Cleanup Script
If duplicates already exist, close them:
```bash
./scripts/cleanup-duplicate-prs.sh --dry-run # preview
./scripts/cleanup-duplicate-prs.sh --close # actually close
```

View File

@@ -1,24 +0,0 @@
# PR Backlog Report — Timmy_Foundation/timmy-config
Generated: 2026-04-14 23:23:33
## Summary
- **Total Open PRs**: 50
- **Stale (>30 days)**: 0
- **Recent (<7 days)**: 50
## Recommendations
### Immediate Actions
1. **Review stale PRs**: 0 PRs are >30 days old
2. **Close duplicates**: Check for duplicate PRs on same issues
3. **Assign reviewers**: Ensure each PR has a reviewer
### Process Improvements
1. **Set SLAs**: Review within 48 hours, merge within 7 days
2. **Weekly cleanup**: Run this analyzer weekly
3. **Automate**: Add CI checks to prevent backlog
## Stale PRs (>30 days)

View File

@@ -1,123 +0,0 @@
#!/usr/bin/env python3
"""
PR Backlog Analyzer for timmy-config
Analyzes open PRs and provides recommendations for cleanup.
Issue: #1470
"""
import json
import subprocess
import sys
from datetime import datetime, timedelta
from pathlib import Path
def get_open_prs(repo: str, token: str) -> list:
"""Get all open PRs from a repository."""
result = subprocess.run([
"curl", "-s", "-H", f"Authorization: token {token}",
f"https://forge.alexanderwhitestone.com/api/v1/repos/{repo}/pulls?state=open&limit=100"
], capture_output=True, text=True)
if result.returncode != 0:
print(f"Error fetching PRs: {result.stderr}")
return []
return json.loads(result.stdout)
def analyze_pr(pr: dict) -> dict:
"""Analyze a single PR."""
created = datetime.fromisoformat(pr['created_at'].replace('Z', '+00:00'))
age_days = (datetime.now(created.tzinfo) - created).days
labels = [l['name'] for l in pr.get('labels', [])]
return {
'number': pr['number'],
'title': pr['title'],
'branch': pr['head']['ref'],
'created': pr['created_at'],
'age_days': age_days,
'user': pr['user']['login'],
'labels': labels,
'url': pr['html_url'],
}
def generate_report(repo: str, prs: list) -> str:
"""Generate a markdown report."""
stale = [p for p in prs if p['age_days'] > 30]
recent = [p for p in prs if p['age_days'] <= 7]
report = f"""# PR Backlog Report — {repo}
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## Summary
- **Total Open PRs**: {len(prs)}
- **Stale (>30 days)**: {len(stale)}
- **Recent (<7 days)**: {len(recent)}
## Recommendations
### Immediate Actions
1. **Review stale PRs**: {len(stale)} PRs are >30 days old
2. **Close duplicates**: Check for duplicate PRs on same issues
3. **Assign reviewers**: Ensure each PR has a reviewer
### Process Improvements
1. **Set SLAs**: Review within 48 hours, merge within 7 days
2. **Weekly cleanup**: Run this analyzer weekly
3. **Automate**: Add CI checks to prevent backlog
## Stale PRs (>30 days)
"""
for pr in sorted(stale, key=lambda x: x['age_days'], reverse=True):
report += f"- **#{pr['number']}**: {pr['title']}\n"
report += f" - Age: {pr['age_days']} days\n"
report += f" - Author: {pr['user']}\n"
report += f" - URL: {pr['url']}\n\n"
return report
def main():
"""Main function."""
token_path = Path.home() / '.config' / 'gitea' / 'token'
if not token_path.exists():
print("Error: Gitea token not found")
sys.exit(1)
token = token_path.read_text().strip()
repo = "Timmy_Foundation/timmy-config"
print(f"Fetching PRs for {repo}...")
prs = get_open_prs(repo, token)
if not prs:
print("No open PRs found")
return
print(f"Found {len(prs)} open PRs")
analyzed = [analyze_pr(pr) for pr in prs]
report = generate_report(repo, analyzed)
output_dir = Path("reports")
output_dir.mkdir(exist_ok=True)
report_file = output_dir / f"pr-backlog-{datetime.now().strftime('%Y%m%d')}.md"
report_file.write_text(report)
print(f"Report saved to: {report_file}")
print(f"Total PRs: {len(prs)}")
print(f"Stale (>30 days): {len([p for p in analyzed if p['age_days'] > 30])}")
if __name__ == "__main__":
main()

70
scripts/pr-preflight-check.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# pr-preflight-check.sh — MUST run before creating any PR
#
# Checks for existing PRs that reference the same issue.
# Refuses to proceed if duplicates exist.
#
# Usage:
# ./scripts/pr-preflight-check.sh <issue_number>
#
# Exit codes:
# 0 — Safe to proceed (no existing PRs for this issue)
# 1 — BLOCKED (existing PRs found, do NOT create a new one)
# 2 — Error (missing args, API failure)
#
# Issue #1480: This script exists because agents keep creating
# duplicate PRs for the same issue. Running this before `git push`
# or `curl ... /pulls` prevents the problem.
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
ISSUE_NUM="${1:-}"
if [ -z "$ISSUE_NUM" ]; then
echo "Usage: $0 <issue_number>"
echo "Example: $0 1128"
exit 2
fi
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
REPO="${REPO:-Timmy_Foundation/the-nexus}"
API="$GITEA_URL/api/v1"
AUTH="Authorization: token $GITEA_TOKEN"
echo "═══ PR Preflight Check for Issue #$ISSUE_NUM ═══"
echo ""
# Fetch open PRs
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100")
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
echo "⚠ Could not fetch PRs (API error or empty response)"
echo "Proceeding with caution."
exit 0
fi
# Find PRs referencing this issue
MATCHES=$(echo "$OPEN_PRS" | jq -r ".[] | select(.title | test(\"#$ISSUE_NUM\"; \"i\") or .body // \"\" | test(\"#$ISSUE_NUM\"; \"i\")) | \" PR #\\(.number): \\(.title) [\\(.head.ref)] (\\(.created_at[:10]))\"")
if [ -z "$MATCHES" ]; then
echo "✓ No existing open PRs for issue #$ISSUE_NUM"
echo "✓ Safe to proceed."
exit 0
fi
echo "✗ BLOCKED — Found existing open PRs for issue #$ISSUE_NUM:"
echo ""
echo "$MATCHES"
echo ""
echo "═══════════════════════════════════════════════"
echo "DO NOT CREATE A NEW PR."
echo ""
echo "Options:"
echo " 1. Review and merge an existing PR"
echo " 2. Close duplicates first: ./scripts/cleanup-duplicate-prs.sh --close"
echo " 3. Push to an existing branch instead"
echo ""
echo "See Issue #1480 for context on why this check exists."
echo "═══════════════════════════════════════════════"
exit 1

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
pr_preflight_check.py — Prevent duplicate PR creation.
Call before creating any PR:
python3 scripts/pr_preflight_check.py 1128
Returns exit code 0 if safe, 1 if blocked.
Designed for agent workflows — agents MUST call this before `curl ... /pulls`.
Issue #1480: The duplicate PR problem.
"""
import json
import os
import sys
import urllib.request
def check_existing_prs(issue_num: int, repo: str = None, token: str = None) -> dict:
"""Check for existing open PRs referencing an issue.
Returns dict with:
safe (bool): True if no duplicates found
matches (list): List of PR dicts that reference the issue
message (str): Human-readable status
"""
gitea_url = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
token = token or os.environ.get("GITEA_TOKEN", "")
repo = repo or os.environ.get("REPO", "Timmy_Foundation/the-nexus")
if not token:
token_path = os.path.expanduser("~/.config/gitea/token")
if os.path.exists(token_path):
token = open(token_path).read().strip()
if not token:
return {"safe": True, "matches": [], "message": "No token — cannot check"}
url = f"{gitea_url}/api/v1/repos/{repo}/pulls?state=open&limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
with urllib.request.urlopen(req, timeout=10) as resp:
prs = json.loads(resp.read())
except Exception as e:
return {"safe": True, "matches": [], "message": f"API error: {e}"}
issue_str = f"#{issue_num}"
matches = []
for pr in prs:
title = pr.get("title", "")
body = pr.get("body") or ""
if issue_str in title or issue_str in body:
matches.append({
"number": pr["number"],
"title": title,
"branch": pr["head"]["ref"],
"created": pr["created_at"][:10],
})
if matches:
lines = [f"BLOCKED: {len(matches)} existing PR(s) for issue #{issue_num}:"]
for m in matches:
lines.append(f" PR #{m['number']}: {m['title']} [{m['branch']}] ({m['created']})")
lines.append("")
lines.append("DO NOT CREATE A NEW PR. Review existing ones first.")
return {"safe": False, "matches": matches, "message": "\n".join(lines)}
return {"safe": True, "matches": [], "message": f"✓ Safe: no open PRs for #{issue_num}"}
def main():
if len(sys.argv) < 2:
print("Usage: pr_preflight_check.py <issue_number> [repo]")
print("Example: pr_preflight_check.py 1128")
print(" pr_preflight_check.py 1339 Timmy_Foundation/the-nexus")
sys.exit(2)
issue_num = int(sys.argv[1])
repo = sys.argv[2] if len(sys.argv) > 2 else None
result = check_existing_prs(issue_num, repo)
print(result["message"])
if not result["safe"]:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()