fix(#1480 ): Add duplicate PR prevention mechanism

Agents keep creating duplicate PRs for the same issue (#1128 had 4+ duplicate PRs). This adds a mandatory preflight check. Files: - scripts/pr-preflight-check.sh: bash preflight (exit 1 if duplicates) - scripts/pr_preflight_check.py: Python preflight (for agent workflows) - docs/DUPLICATE_PR_PREVENTION.md: usage documentation Closes #1480, #1460, #1474
2026-04-14 18:59:02 -04:00
6 changed files with 212 additions and 515 deletions
--- a/.githooks/commit-msg
+++ b/.githooks/commit-msg
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Commit-msg hook: warn about shell injection risks
-# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
-
-COMMIT_MSG_FILE="$1"
-COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
-
-# Check for dangerous patterns
-DANGEROUS_PATTERNS=(
-    '`'           # Backticks
-    '$('          # Command substitution
-    '${'          # Variable expansion
-    '\\`'         # Escaped backticks
-    'eval '       # eval command
-    'exec '       # exec command
-    'source '     # source command
-    '|'           # Pipe
-    '&&'          # AND operator
-    '||'          # OR operator
-    ';'           # Semicolon
-    '>'           # Redirect
-    '<'           # Input redirect
-)
-
-FOUND_ISSUES=()
-for pattern in "${DANGEROUS_PATTERNS[@]}"; do
-    if echo "$COMMIT_MSG" | grep -q "$pattern"; then
-        FOUND_ISSUES+=("$pattern")
-    fi
-done
-
-if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
-    echo "⚠️  WARNING: Commit message contains potentially dangerous patterns:"
-    for issue in "${FOUND_ISSUES[@]}"; do
-        echo "  - $issue"
-    done
-    echo ""
-    echo "This could trigger shell execution during git operations."
-    echo ""
-    echo "Safe alternatives:"
-    echo "  1. Use: git commit -F <file> instead of git commit -m"
-    echo "  2. Escape special characters in commit messages"
-    echo "  3. Use the safe_commit() function from bin/safe_commit.py"
-    echo ""
-    echo "To proceed anyway, use: git commit --no-verify"
-    exit 1
-fi
-
-exit 0
--- a/bin/safe_commit.py
+++ b/bin/safe_commit.py
@@ -1,307 +0,0 @@
-#!/usr/bin/env python3
-"""
-Safe commit message handler to prevent shell injection.
-
-Issue #1430: [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
-
-This script provides safe ways to commit with code-containing messages.
-"""
-
-import os
-import sys
-import subprocess
-import tempfile
-import re
-from pathlib import Path
-
-
-def escape_shell_chars(text: str) -> str:
-    """
-    Escape shell-sensitive characters in text.
-    
-    This prevents shell injection when text is used in shell commands.
-    """
-    # Characters that need escaping in shell
-    shell_chars = ['$', '`', '\\', '"', "'", '!', '(', ')', '{', '}', '[', ']', 
-                   '|', '&', ';', '<', '>', '*', '?', '~', '#']
-    
-    escaped = text
-    for char in shell_chars:
-        escaped = escaped.replace(char, '\\' + char)
-    
-    return escaped
-
-
-def safe_commit_message(message: str) -> str:
-    """
-    Create a safe commit message by escaping shell-sensitive characters.
-    
-    Args:
-        message: The commit message
-        
-    Returns:
-        Escaped commit message safe for shell use
-    """
-    return escape_shell_chars(message)
-
-
-def commit_with_file(message: str, branch: str = None) -> bool:
-    """
-    Commit using a temporary file instead of -m flag.
-    
-    This is the safest way to commit messages containing code or special characters.
-    
-    Args:
-        message: The commit message
-        branch: Optional branch name
-        
-    Returns:
-        True if successful, False otherwise
-    """
-    # Create temporary file for commit message
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
-        f.write(message)
-        temp_file = f.name
-    
-    try:
-        # Build git command
-        cmd = ['git', 'commit', '-F', temp_file]
-        if branch:
-            cmd.extend(['-b', branch])
-        
-        # Execute git commit
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        
-        if result.returncode == 0:
-            print(f"✅ Committed successfully using file: {temp_file}")
-            return True
-        else:
-            print(f"❌ Commit failed: {result.stderr}")
-            return False
-            
-    finally:
-        # Clean up temporary file
-        try:
-            os.unlink(temp_file)
-        except:
-            pass
-
-
-def commit_safe(message: str, use_file: bool = True) -> bool:
-    """
-    Safely commit with a message.
-    
-    Args:
-        message: The commit message
-        use_file: If True, use -F <file> instead of -m
-        
-    Returns:
-        True if successful, False otherwise
-    """
-    if use_file:
-        return commit_with_file(message)
-    else:
-        # Use escaped message with -m flag
-        escaped_message = safe_commit_message(message)
-        cmd = ['git', 'commit', '-m', escaped_message]
-        
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        
-        if result.returncode == 0:
-            print("✅ Committed successfully with escaped message")
-            return True
-        else:
-            print(f"❌ Commit failed: {result.stderr}")
-            return False
-
-
-def check_commit_message_safety(message: str) -> dict:
-    """
-    Check if a commit message contains potentially dangerous patterns.
-    
-    Args:
-        message: The commit message to check
-        
-    Returns:
-        Dictionary with safety analysis
-    """
-    dangerous_patterns = [
-        (r'`[^`]*`', 'Backticks (shell command substitution)'),
-        (r'\$\([^)]*\)', 'Command substitution $(...)'),
-        (r'\$\{[^}]*\}', 'Variable expansion ${...}'),
-        (r'\\`', 'Escaped backticks'),
-        (r'eval\s+', 'eval command'),
-        (r'exec\s+', 'exec command'),
-        (r'source\s+', 'source command'),
-        (r'\.\s+', 'dot command'),
-        (r'\|\s*', 'Pipe character'),
-        (r'&&', 'AND operator'),
-        (r'\|\|', 'OR operator'),
-        (r';', 'Semicolon (command separator)'),
-        (r'>', 'Redirect operator'),
-        (r'<', 'Input redirect'),
-    ]
-    
-    findings = []
-    for pattern, description in dangerous_patterns:
-        matches = re.findall(pattern, message)
-        if matches:
-            findings.append({
-                'pattern': pattern,
-                'description': description,
-                'matches': matches,
-                'count': len(matches)
-            })
-    
-    return {
-        'safe': len(findings) == 0,
-        'findings': findings,
-        'recommendation': 'Use commit_with_file() or escape_shell_chars()' if findings else 'Message appears safe'
-    }
-
-
-def create_commit_hook_guard():
-    """
-    Create a commit-msg hook that warns about dangerous patterns.
-    """
-    hook_content = '''#!/usr/bin/env bash
-# Commit-msg hook: warn about shell injection risks
-# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
-
-COMMIT_MSG_FILE="$1"
-COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
-
-# Check for dangerous patterns
-DANGEROUS_PATTERNS=(
-    '`'           # Backticks
-    '$('          # Command substitution
-    '${'          # Variable expansion
-    '\\`'         # Escaped backticks
-    'eval '       # eval command
-    'exec '       # exec command
-    'source '     # source command
-    '|'           # Pipe
-    '&&'          # AND operator
-    '||'          # OR operator
-    ';'           # Semicolon
-    '>'           # Redirect
-    '<'           # Input redirect
-)
-
-FOUND_ISSUES=()
-for pattern in "${DANGEROUS_PATTERNS[@]}"; do
-    if echo "$COMMIT_MSG" | grep -q "$pattern"; then
-        FOUND_ISSUES+=("$pattern")
-    fi
-done
-
-if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
-    echo "⚠️  WARNING: Commit message contains potentially dangerous patterns:"
-    for issue in "${FOUND_ISSUES[@]}"; do
-        echo "  - $issue"
-    done
-    echo ""
-    echo "This could trigger shell execution during git operations."
-    echo ""
-    echo "Safe alternatives:"
-    echo "  1. Use: git commit -F <file> instead of git commit -m"
-    echo "  2. Escape special characters in commit messages"
-    echo "  3. Use the safe_commit() function from bin/safe_commit.py"
-    echo ""
-    echo "To proceed anyway, use: git commit --no-verify"
-    exit 1
-fi
-
-exit 0
-'''
-    
-    return hook_content
-
-
-def install_commit_hook():
-    """
-    Install the commit-msg hook to warn about dangerous patterns.
-    """
-    hook_path = Path('.git/hooks/commit-msg')
-    hook_content = create_commit_hook_guard()
-    
-    # Check if .git/hooks exists
-    if not hook_path.parent.exists():
-        print("❌ .git/hooks directory not found")
-        return False
-    
-    # Write hook
-    with open(hook_path, 'w') as f:
-        f.write(hook_content)
-    
-    # Make executable
-    os.chmod(hook_path, 0o755)
-    
-    print(f"✅ Installed commit-msg hook to {hook_path}")
-    return True
-
-
-def main():
-    """Main entry point for safe commit tool."""
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Safe commit message handling")
-    parser.add_argument("--message", "-m", help="Commit message")
-    parser.add_argument("--file", "-F", help="Read commit message from file")
-    parser.add_argument("--check", action="store_true", help="Check message safety")
-    parser.add_argument("--install-hook", action="store_true", help="Install commit-msg hook")
-    parser.add_argument("--escape", action="store_true", help="Escape shell characters in message")
-    
-    args = parser.parse_args()
-    
-    if args.install_hook:
-        if install_commit_hook():
-            print("Commit hook installed successfully")
-        else:
-            print("Failed to install commit hook")
-            sys.exit(1)
-        return
-    
-    if args.check:
-        if args.message:
-            safety = check_commit_message_safety(args.message)
-            print(f"Message safety check:")
-            print(f"  Safe: {safety['safe']}")
-            print(f"  Recommendation: {safety['recommendation']}")
-            if safety['findings']:
-                print(f"  Findings:")
-                for finding in safety['findings']:
-                    print(f"    - {finding['description']}: {finding['count']} matches")
-        else:
-            print("Please provide a message with --message")
-        return
-    
-    if args.escape:
-        if args.message:
-            escaped = safe_commit_message(args.message)
-            print(f"Escaped message:")
-            print(escaped)
-        else:
-            print("Please provide a message with --message")
-        return
-    
-    if args.file:
-        # Read message from file
-        with open(args.file, 'r') as f:
-            message = f.read()
-        commit_with_file(message)
-    elif args.message:
-        # Check if message has dangerous patterns
-        safety = check_commit_message_safety(args.message)
-        if safety['safe']:
-            commit_safe(args.message, use_file=False)
-        else:
-            print("⚠️  Message contains potentially dangerous patterns")
-            print("Using file-based commit for safety...")
-            commit_safe(args.message, use_file=True)
-    else:
-        parser.print_help()
-
-
-if __name__ == "__main__":
-    main()
--- a/docs/DUPLICATE_PR_PREVENTION.md
+++ b/docs/DUPLICATE_PR_PREVENTION.md
@@ -0,0 +1,50 @@
+# Duplicate PR Prevention
+
+## The Problem
+
+Issue #1128 documented a cleanup of duplicate PRs. Agents then created
+4+ duplicate PRs *for issue #1128 itself*. The irony was not lost on anyone.
+
+See: #1449, #1460, #1474, #1480.
+
+## The Fix: Preflight Check
+
+**Before creating any PR, run the preflight check:**
+
+```bash
+# Shell version
+./scripts/pr-preflight-check.sh <issue_number>
+
+# Python version
+python3 scripts/pr_preflight_check.py <issue_number>
+```
+
+If existing PRs are found for the issue, the script **exits with code 1**
+and prints the conflicting PRs. DO NOT proceed to create a new PR.
+
+## Agent Workflow
+
+```
+1. Read issue
+2. Clone repo
+3. Implement fix
+4. Commit
+5. >>> RUN pr_preflight_check.py <issue_number> <<<
+6.   If exit 0: safe to push and create PR
+7.   If exit 1: STOP — review existing PRs first
+8. Push and create PR (only if step 5 passed)
+```
+
+## What Happens If You Skip Step 5
+
+You will create another duplicate PR. The cleanup script will find it.
+Someone will close it. You will have wasted compute and created noise.
+
+## Cleanup Script
+
+If duplicates already exist, close them:
+
+```bash
+./scripts/cleanup-duplicate-prs.sh --dry-run   # preview
+./scripts/cleanup-duplicate-prs.sh --close      # actually close
+```
--- a/docs/safe-commit-practices.md
+++ b/docs/safe-commit-practices.md
@@ -1,159 +0,0 @@
-# Safe Commit Practices
-
-**Issue:** #1430 - [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
-
-## Problem
-
-During commit for #1124, the commit message contained Python code examples that triggered shell execution of memory_mine.py. The backtick-wrapped code in the commit message was interpreted by the shell during git commit processing.
-
-This is a potential vector for unintended code execution.
-
-## Safe Commit Methods
-
-### 1. Use `git commit -F <file>` (Recommended)
-
-The safest way to commit messages containing code or special characters:
-
-```bash
-# Create a file with your commit message
-echo "Fix: implement memory_mine.py with backtick example
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This commit adds memory mining functionality." > /tmp/commit-msg.txt
-
-# Commit using the file
-git commit -F /tmp/commit-msg.txt
-```
-
-### 2. Use the Safe Commit Tool
-
-```bash
-# Safe commit with automatic escaping
-python3 bin/safe_commit.py -m "Fix: implement memory_mine.py with backtick example"
-
-# Safe commit using file
-python3 bin/safe_commit.py -F /tmp/commit-msg.txt
-
-# Check if a message is safe
-python3 bin/safe_commit.py --check -m "Example: \`python3 bin/memory_mine.py\`"
-```
-
-### 3. Escape Shell Characters Manually
-
-If you must use `git commit -m`, escape special characters:
-
-```bash
-# Escape backticks and other shell characters
-git commit -m "Fix: implement memory_mine.py with backtick example
-
-Example: \\`python3 bin/memory_mine.py --days 7\\`
-
-This commit adds memory mining functionality."
-```
-
-## Dangerous Patterns to Avoid
-
-The following patterns in commit messages can trigger shell execution:
-
- **Backticks**: `` `command` `` → Executes command
- **Command substitution**: `$(command)` → Executes command
- **Variable expansion**: `${variable}` → Expands variable
- **Pipes**: `command1 | command2` → Pipes output
- **Operators**: `&&`, `||`, `;` → Command chaining
- **Redirects**: `>`, `<` → File operations
-
-## Installation
-
-### Install the Commit Hook
-
-To automatically warn about dangerous patterns:
-
-```bash
-# Install the commit-msg hook
-python3 bin/safe_commit.py --install-hook
-
-# Or manually
-cp .githooks/commit-msg .git/hooks/commit-msg
-chmod +x .git/hooks/commit-msg
-```
-
-### Configure Git Hooks Path
-
-If using the `.githooks` directory:
-
-```bash
-git config core.hooksPath .githooks
-```
-
-## Examples
-
-### ❌ Dangerous (Don't do this)
-
-```bash
-# This could trigger shell execution
-git commit -m "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace."
-```
-
-### ✅ Safe (Do this instead)
-
-```bash
-# Method 1: Use file
-echo "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace." > /tmp/commit-msg.txt
-git commit -F /tmp/commit-msg.txt
-
-# Method 2: Use safe commit tool
-python3 bin/safe_commit.py -m "Fix: implement memory_mine.py
-
-Example: \`python3 bin/memory_mine.py --days 7\`
-
-This mines sessions into MemPalace."
-
-# Method 3: Escape manually
-git commit -m "Fix: implement memory_mine.py
-
-Example: \\`python3 bin/memory_mine.py --days 7\\`
-
-This mines sessions into MemPalace."
-```
-
-## What Happened in Issue #1430
-
-During commit for #1124, a commit message contained:
-```
-Example: \`python3 bin/memory_mine.py --days 7\`
-```
-
-The backticks were interpreted by the shell during git commit processing, causing memory_mine.py to execute. While the outcome was positive (26 sessions mined), this is a security risk.
-
-## Prevention
-
-1. **Always use `git commit -F <file>`** for messages containing code
-2. **Install the commit-msg hook** to warn about dangerous patterns
-3. **Use the safe_commit.py tool** for automatic escaping
-4. **Document safe patterns** in team guidelines
-
-## Related Issues
-
- **Issue #1430:** This improvement
- **Issue #1124:** Original issue that triggered the problem
-
-## Files
-
- `bin/safe_commit.py` - Safe commit tool
- `.githooks/commit-msg` - Commit hook (to be installed)
- `docs/safe-commit-practices.md` - This documentation
-
-## Conclusion
-
-Shell injection in commit messages is a real security risk. By using safe commit practices, we can prevent unintended code execution while still allowing code examples in commit messages.
-
-**Remember:** When in doubt, use `git commit -F <file>` instead of `git commit -m`.
--- a/scripts/pr-preflight-check.sh
+++ b/scripts/pr-preflight-check.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# ═══════════════════════════════════════════════════════════════
+# pr-preflight-check.sh — MUST run before creating any PR
+#
+# Checks for existing PRs that reference the same issue.
+# Refuses to proceed if duplicates exist.
+#
+# Usage:
+#   ./scripts/pr-preflight-check.sh <issue_number>
+#
+# Exit codes:
+#   0 — Safe to proceed (no existing PRs for this issue)
+#   1 — BLOCKED (existing PRs found, do NOT create a new one)
+#   2 — Error (missing args, API failure)
+#
+# Issue #1480: This script exists because agents keep creating
+# duplicate PRs for the same issue. Running this before `git push`
+# or `curl ... /pulls` prevents the problem.
+# ═══════════════════════════════════════════════════════════════
+set -euo pipefail
+
+ISSUE_NUM="${1:-}"
+if [ -z "$ISSUE_NUM" ]; then
+    echo "Usage: $0 <issue_number>"
+    echo "Example: $0 1128"
+    exit 2
+fi
+
+GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
+GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
+REPO="${REPO:-Timmy_Foundation/the-nexus}"
+API="$GITEA_URL/api/v1"
+AUTH="Authorization: token $GITEA_TOKEN"
+
+echo "═══ PR Preflight Check for Issue #$ISSUE_NUM ═══"
+echo ""
+
+# Fetch open PRs
+OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100")
+
+if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
+    echo "⚠ Could not fetch PRs (API error or empty response)"
+    echo "Proceeding with caution."
+    exit 0
+fi
+
+# Find PRs referencing this issue
+MATCHES=$(echo "$OPEN_PRS" | jq -r ".[] | select(.title | test(\"#$ISSUE_NUM\"; \"i\") or .body // \"\" | test(\"#$ISSUE_NUM\"; \"i\")) | \"  PR #\\(.number): \\(.title) [\\(.head.ref)] (\\(.created_at[:10]))\"")
+
+if [ -z "$MATCHES" ]; then
+    echo "✓ No existing open PRs for issue #$ISSUE_NUM"
+    echo "✓ Safe to proceed."
+    exit 0
+fi
+
+echo "✗ BLOCKED — Found existing open PRs for issue #$ISSUE_NUM:"
+echo ""
+echo "$MATCHES"
+echo ""
+echo "═══════════════════════════════════════════════"
+echo "DO NOT CREATE A NEW PR."
+echo ""
+echo "Options:"
+echo "  1. Review and merge an existing PR"
+echo "  2. Close duplicates first: ./scripts/cleanup-duplicate-prs.sh --close"
+echo "  3. Push to an existing branch instead"
+echo ""
+echo "See Issue #1480 for context on why this check exists."
+echo "═══════════════════════════════════════════════"
+exit 1
--- a/scripts/pr_preflight_check.py
+++ b/scripts/pr_preflight_check.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""
+pr_preflight_check.py — Prevent duplicate PR creation.
+
+Call before creating any PR:
+    python3 scripts/pr_preflight_check.py 1128
+
+Returns exit code 0 if safe, 1 if blocked.
+Designed for agent workflows — agents MUST call this before `curl ... /pulls`.
+
+Issue #1480: The duplicate PR problem.
+"""
+import json
+import os
+import sys
+import urllib.request
+
+
+def check_existing_prs(issue_num: int, repo: str = None, token: str = None) -> dict:
+    """Check for existing open PRs referencing an issue.
+
+    Returns dict with:
+        safe (bool): True if no duplicates found
+        matches (list): List of PR dicts that reference the issue
+        message (str): Human-readable status
+    """
+    gitea_url = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
+    token = token or os.environ.get("GITEA_TOKEN", "")
+    repo = repo or os.environ.get("REPO", "Timmy_Foundation/the-nexus")
+
+    if not token:
+        token_path = os.path.expanduser("~/.config/gitea/token")
+        if os.path.exists(token_path):
+            token = open(token_path).read().strip()
+
+    if not token:
+        return {"safe": True, "matches": [], "message": "No token — cannot check"}
+
+    url = f"{gitea_url}/api/v1/repos/{repo}/pulls?state=open&limit=100"
+    req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
+
+    try:
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            prs = json.loads(resp.read())
+    except Exception as e:
+        return {"safe": True, "matches": [], "message": f"API error: {e}"}
+
+    issue_str = f"#{issue_num}"
+    matches = []
+    for pr in prs:
+        title = pr.get("title", "")
+        body = pr.get("body") or ""
+        if issue_str in title or issue_str in body:
+            matches.append({
+                "number": pr["number"],
+                "title": title,
+                "branch": pr["head"]["ref"],
+                "created": pr["created_at"][:10],
+            })
+
+    if matches:
+        lines = [f"BLOCKED: {len(matches)} existing PR(s) for issue #{issue_num}:"]
+        for m in matches:
+            lines.append(f"  PR #{m['number']}: {m['title']} [{m['branch']}] ({m['created']})")
+        lines.append("")
+        lines.append("DO NOT CREATE A NEW PR. Review existing ones first.")
+        return {"safe": False, "matches": matches, "message": "\n".join(lines)}
+
+    return {"safe": True, "matches": [], "message": f"✓ Safe: no open PRs for #{issue_num}"}
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: pr_preflight_check.py <issue_number> [repo]")
+        print("Example: pr_preflight_check.py 1128")
+        print("         pr_preflight_check.py 1339 Timmy_Foundation/the-nexus")
+        sys.exit(2)
+
+    issue_num = int(sys.argv[1])
+    repo = sys.argv[2] if len(sys.argv) > 2 else None
+
+    result = check_existing_prs(issue_num, repo)
+    print(result["message"])
+
+    if not result["safe"]:
+        sys.exit(1)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()