Compare commits

..

1 Commits

Author SHA1 Message Date
Timmy
25b26c536d fix(#1480): Add duplicate PR prevention mechanism
Some checks failed
CI / test (pull_request) Failing after 51s
CI / validate (pull_request) Failing after 40s
Review Approval Gate / verify-review (pull_request) Failing after 6s
Agents keep creating duplicate PRs for the same issue (#1128
had 4+ duplicate PRs). This adds a mandatory preflight check.

Files:
  - scripts/pr-preflight-check.sh: bash preflight (exit 1 if duplicates)
  - scripts/pr_preflight_check.py: Python preflight (for agent workflows)
  - docs/DUPLICATE_PR_PREVENTION.md: usage documentation

Closes #1480, #1460, #1474
2026-04-14 18:59:02 -04:00
6 changed files with 212 additions and 515 deletions

View File

@@ -1,49 +0,0 @@
#!/usr/bin/env bash
# Commit-msg hook: warn about shell injection risks
# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
COMMIT_MSG_FILE="$1"
COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
# Check for dangerous patterns
DANGEROUS_PATTERNS=(
'`' # Backticks
'$(' # Command substitution
'${' # Variable expansion
'\\`' # Escaped backticks
'eval ' # eval command
'exec ' # exec command
'source ' # source command
'|' # Pipe
'&&' # AND operator
'||' # OR operator
';' # Semicolon
'>' # Redirect
'<' # Input redirect
)
FOUND_ISSUES=()
for pattern in "${DANGEROUS_PATTERNS[@]}"; do
if echo "$COMMIT_MSG" | grep -q "$pattern"; then
FOUND_ISSUES+=("$pattern")
fi
done
if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
echo "⚠️ WARNING: Commit message contains potentially dangerous patterns:"
for issue in "${FOUND_ISSUES[@]}"; do
echo " - $issue"
done
echo ""
echo "This could trigger shell execution during git operations."
echo ""
echo "Safe alternatives:"
echo " 1. Use: git commit -F <file> instead of git commit -m"
echo " 2. Escape special characters in commit messages"
echo " 3. Use the safe_commit() function from bin/safe_commit.py"
echo ""
echo "To proceed anyway, use: git commit --no-verify"
exit 1
fi
exit 0

View File

@@ -1,307 +0,0 @@
#!/usr/bin/env python3
"""
Safe commit message handler to prevent shell injection.
Issue #1430: [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
This script provides safe ways to commit with code-containing messages.
"""
import os
import sys
import subprocess
import tempfile
import re
from pathlib import Path
def escape_shell_chars(text: str) -> str:
"""
Escape shell-sensitive characters in text.
This prevents shell injection when text is used in shell commands.
"""
# Characters that need escaping in shell
shell_chars = ['$', '`', '\\', '"', "'", '!', '(', ')', '{', '}', '[', ']',
'|', '&', ';', '<', '>', '*', '?', '~', '#']
escaped = text
for char in shell_chars:
escaped = escaped.replace(char, '\\' + char)
return escaped
def safe_commit_message(message: str) -> str:
"""
Create a safe commit message by escaping shell-sensitive characters.
Args:
message: The commit message
Returns:
Escaped commit message safe for shell use
"""
return escape_shell_chars(message)
def commit_with_file(message: str, branch: str = None) -> bool:
"""
Commit using a temporary file instead of -m flag.
This is the safest way to commit messages containing code or special characters.
Args:
message: The commit message
branch: Optional branch name
Returns:
True if successful, False otherwise
"""
# Create temporary file for commit message
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write(message)
temp_file = f.name
try:
# Build git command
cmd = ['git', 'commit', '-F', temp_file]
if branch:
cmd.extend(['-b', branch])
# Execute git commit
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"✅ Committed successfully using file: {temp_file}")
return True
else:
print(f"❌ Commit failed: {result.stderr}")
return False
finally:
# Clean up temporary file
try:
os.unlink(temp_file)
except:
pass
def commit_safe(message: str, use_file: bool = True) -> bool:
"""
Safely commit with a message.
Args:
message: The commit message
use_file: If True, use -F <file> instead of -m
Returns:
True if successful, False otherwise
"""
if use_file:
return commit_with_file(message)
else:
# Use escaped message with -m flag
escaped_message = safe_commit_message(message)
cmd = ['git', 'commit', '-m', escaped_message]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print("✅ Committed successfully with escaped message")
return True
else:
print(f"❌ Commit failed: {result.stderr}")
return False
def check_commit_message_safety(message: str) -> dict:
"""
Check if a commit message contains potentially dangerous patterns.
Args:
message: The commit message to check
Returns:
Dictionary with safety analysis
"""
dangerous_patterns = [
(r'`[^`]*`', 'Backticks (shell command substitution)'),
(r'\$\([^)]*\)', 'Command substitution $(...)'),
(r'\$\{[^}]*\}', 'Variable expansion ${...}'),
(r'\\`', 'Escaped backticks'),
(r'eval\s+', 'eval command'),
(r'exec\s+', 'exec command'),
(r'source\s+', 'source command'),
(r'\.\s+', 'dot command'),
(r'\|\s*', 'Pipe character'),
(r'&&', 'AND operator'),
(r'\|\|', 'OR operator'),
(r';', 'Semicolon (command separator)'),
(r'>', 'Redirect operator'),
(r'<', 'Input redirect'),
]
findings = []
for pattern, description in dangerous_patterns:
matches = re.findall(pattern, message)
if matches:
findings.append({
'pattern': pattern,
'description': description,
'matches': matches,
'count': len(matches)
})
return {
'safe': len(findings) == 0,
'findings': findings,
'recommendation': 'Use commit_with_file() or escape_shell_chars()' if findings else 'Message appears safe'
}
def create_commit_hook_guard():
"""
Create a commit-msg hook that warns about dangerous patterns.
"""
hook_content = '''#!/usr/bin/env bash
# Commit-msg hook: warn about shell injection risks
# Install: cp .githooks/commit-msg .git/hooks/commit-msg && chmod +x .git/hooks/commit-msg
COMMIT_MSG_FILE="$1"
COMMIT_MSG=$(cat "$COMMIT_MSG_FILE")
# Check for dangerous patterns
DANGEROUS_PATTERNS=(
'`' # Backticks
'$(' # Command substitution
'${' # Variable expansion
'\\`' # Escaped backticks
'eval ' # eval command
'exec ' # exec command
'source ' # source command
'|' # Pipe
'&&' # AND operator
'||' # OR operator
';' # Semicolon
'>' # Redirect
'<' # Input redirect
)
FOUND_ISSUES=()
for pattern in "${DANGEROUS_PATTERNS[@]}"; do
if echo "$COMMIT_MSG" | grep -q "$pattern"; then
FOUND_ISSUES+=("$pattern")
fi
done
if [ ${#FOUND_ISSUES[@]} -gt 0 ]; then
echo "⚠️ WARNING: Commit message contains potentially dangerous patterns:"
for issue in "${FOUND_ISSUES[@]}"; do
echo " - $issue"
done
echo ""
echo "This could trigger shell execution during git operations."
echo ""
echo "Safe alternatives:"
echo " 1. Use: git commit -F <file> instead of git commit -m"
echo " 2. Escape special characters in commit messages"
echo " 3. Use the safe_commit() function from bin/safe_commit.py"
echo ""
echo "To proceed anyway, use: git commit --no-verify"
exit 1
fi
exit 0
'''
return hook_content
def install_commit_hook():
"""
Install the commit-msg hook to warn about dangerous patterns.
"""
hook_path = Path('.git/hooks/commit-msg')
hook_content = create_commit_hook_guard()
# Check if .git/hooks exists
if not hook_path.parent.exists():
print("❌ .git/hooks directory not found")
return False
# Write hook
with open(hook_path, 'w') as f:
f.write(hook_content)
# Make executable
os.chmod(hook_path, 0o755)
print(f"✅ Installed commit-msg hook to {hook_path}")
return True
def main():
"""Main entry point for safe commit tool."""
import argparse
parser = argparse.ArgumentParser(description="Safe commit message handling")
parser.add_argument("--message", "-m", help="Commit message")
parser.add_argument("--file", "-F", help="Read commit message from file")
parser.add_argument("--check", action="store_true", help="Check message safety")
parser.add_argument("--install-hook", action="store_true", help="Install commit-msg hook")
parser.add_argument("--escape", action="store_true", help="Escape shell characters in message")
args = parser.parse_args()
if args.install_hook:
if install_commit_hook():
print("Commit hook installed successfully")
else:
print("Failed to install commit hook")
sys.exit(1)
return
if args.check:
if args.message:
safety = check_commit_message_safety(args.message)
print(f"Message safety check:")
print(f" Safe: {safety['safe']}")
print(f" Recommendation: {safety['recommendation']}")
if safety['findings']:
print(f" Findings:")
for finding in safety['findings']:
print(f" - {finding['description']}: {finding['count']} matches")
else:
print("Please provide a message with --message")
return
if args.escape:
if args.message:
escaped = safe_commit_message(args.message)
print(f"Escaped message:")
print(escaped)
else:
print("Please provide a message with --message")
return
if args.file:
# Read message from file
with open(args.file, 'r') as f:
message = f.read()
commit_with_file(message)
elif args.message:
# Check if message has dangerous patterns
safety = check_commit_message_safety(args.message)
if safety['safe']:
commit_safe(args.message, use_file=False)
else:
print("⚠️ Message contains potentially dangerous patterns")
print("Using file-based commit for safety...")
commit_safe(args.message, use_file=True)
else:
parser.print_help()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,50 @@
# Duplicate PR Prevention
## The Problem
Issue #1128 documented a cleanup of duplicate PRs. Agents then created
4+ duplicate PRs *for issue #1128 itself*. The irony was not lost on anyone.
See: #1449, #1460, #1474, #1480.
## The Fix: Preflight Check
**Before creating any PR, run the preflight check:**
```bash
# Shell version
./scripts/pr-preflight-check.sh <issue_number>
# Python version
python3 scripts/pr_preflight_check.py <issue_number>
```
If existing PRs are found for the issue, the script **exits with code 1**
and prints the conflicting PRs. DO NOT proceed to create a new PR.
## Agent Workflow
```
1. Read issue
2. Clone repo
3. Implement fix
4. Commit
5. >>> RUN pr_preflight_check.py <issue_number> <<<
6. If exit 0: safe to push and create PR
7. If exit 1: STOP — review existing PRs first
8. Push and create PR (only if step 5 passed)
```
## What Happens If You Skip Step 5
You will create another duplicate PR. The cleanup script will find it.
Someone will close it. You will have wasted compute and created noise.
## Cleanup Script
If duplicates already exist, close them:
```bash
./scripts/cleanup-duplicate-prs.sh --dry-run # preview
./scripts/cleanup-duplicate-prs.sh --close # actually close
```

View File

@@ -1,159 +0,0 @@
# Safe Commit Practices
**Issue:** #1430 - [IMPROVEMENT] memory_mine.py ran during git commit — shell injection from commit message
## Problem
During commit for #1124, the commit message contained Python code examples that triggered shell execution of memory_mine.py. The backtick-wrapped code in the commit message was interpreted by the shell during git commit processing.
This is a potential vector for unintended code execution.
## Safe Commit Methods
### 1. Use `git commit -F <file>` (Recommended)
The safest way to commit messages containing code or special characters:
```bash
# Create a file with your commit message
echo "Fix: implement memory_mine.py with backtick example
Example: \`python3 bin/memory_mine.py --days 7\`
This commit adds memory mining functionality." > /tmp/commit-msg.txt
# Commit using the file
git commit -F /tmp/commit-msg.txt
```
### 2. Use the Safe Commit Tool
```bash
# Safe commit with automatic escaping
python3 bin/safe_commit.py -m "Fix: implement memory_mine.py with backtick example"
# Safe commit using file
python3 bin/safe_commit.py -F /tmp/commit-msg.txt
# Check if a message is safe
python3 bin/safe_commit.py --check -m "Example: \`python3 bin/memory_mine.py\`"
```
### 3. Escape Shell Characters Manually
If you must use `git commit -m`, escape special characters:
```bash
# Escape backticks and other shell characters
git commit -m "Fix: implement memory_mine.py with backtick example
Example: \\`python3 bin/memory_mine.py --days 7\\`
This commit adds memory mining functionality."
```
## Dangerous Patterns to Avoid
The following patterns in commit messages can trigger shell execution:
- **Backticks**: `` `command` `` → Executes command
- **Command substitution**: `$(command)` → Executes command
- **Variable expansion**: `${variable}` → Expands variable
- **Pipes**: `command1 | command2` → Pipes output
- **Operators**: `&&`, `||`, `;` → Command chaining
- **Redirects**: `>`, `<` → File operations
## Installation
### Install the Commit Hook
To automatically warn about dangerous patterns:
```bash
# Install the commit-msg hook
python3 bin/safe_commit.py --install-hook
# Or manually
cp .githooks/commit-msg .git/hooks/commit-msg
chmod +x .git/hooks/commit-msg
```
### Configure Git Hooks Path
If using the `.githooks` directory:
```bash
git config core.hooksPath .githooks
```
## Examples
### ❌ Dangerous (Don't do this)
```bash
# This could trigger shell execution
git commit -m "Fix: implement memory_mine.py
Example: \`python3 bin/memory_mine.py --days 7\`
This mines sessions into MemPalace."
```
### ✅ Safe (Do this instead)
```bash
# Method 1: Use file
echo "Fix: implement memory_mine.py
Example: \`python3 bin/memory_mine.py --days 7\`
This mines sessions into MemPalace." > /tmp/commit-msg.txt
git commit -F /tmp/commit-msg.txt
# Method 2: Use safe commit tool
python3 bin/safe_commit.py -m "Fix: implement memory_mine.py
Example: \`python3 bin/memory_mine.py --days 7\`
This mines sessions into MemPalace."
# Method 3: Escape manually
git commit -m "Fix: implement memory_mine.py
Example: \\`python3 bin/memory_mine.py --days 7\\`
This mines sessions into MemPalace."
```
## What Happened in Issue #1430
During commit for #1124, a commit message contained:
```
Example: \`python3 bin/memory_mine.py --days 7\`
```
The backticks were interpreted by the shell during git commit processing, causing memory_mine.py to execute. While the outcome was positive (26 sessions mined), this is a security risk.
## Prevention
1. **Always use `git commit -F <file>`** for messages containing code
2. **Install the commit-msg hook** to warn about dangerous patterns
3. **Use the safe_commit.py tool** for automatic escaping
4. **Document safe patterns** in team guidelines
## Related Issues
- **Issue #1430:** This improvement
- **Issue #1124:** Original issue that triggered the problem
## Files
- `bin/safe_commit.py` - Safe commit tool
- `.githooks/commit-msg` - Commit hook (to be installed)
- `docs/safe-commit-practices.md` - This documentation
## Conclusion
Shell injection in commit messages is a real security risk. By using safe commit practices, we can prevent unintended code execution while still allowing code examples in commit messages.
**Remember:** When in doubt, use `git commit -F <file>` instead of `git commit -m`.

70
scripts/pr-preflight-check.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# ═══════════════════════════════════════════════════════════════
# pr-preflight-check.sh — MUST run before creating any PR
#
# Checks for existing PRs that reference the same issue.
# Refuses to proceed if duplicates exist.
#
# Usage:
# ./scripts/pr-preflight-check.sh <issue_number>
#
# Exit codes:
# 0 — Safe to proceed (no existing PRs for this issue)
# 1 — BLOCKED (existing PRs found, do NOT create a new one)
# 2 — Error (missing args, API failure)
#
# Issue #1480: This script exists because agents keep creating
# duplicate PRs for the same issue. Running this before `git push`
# or `curl ... /pulls` prevents the problem.
# ═══════════════════════════════════════════════════════════════
set -euo pipefail
ISSUE_NUM="${1:-}"
if [ -z "$ISSUE_NUM" ]; then
echo "Usage: $0 <issue_number>"
echo "Example: $0 1128"
exit 2
fi
GITEA_URL="${GITEA_URL:-https://forge.alexanderwhitestone.com}"
GITEA_TOKEN="${GITEA_TOKEN:?Set GITEA_TOKEN env var}"
REPO="${REPO:-Timmy_Foundation/the-nexus}"
API="$GITEA_URL/api/v1"
AUTH="Authorization: token $GITEA_TOKEN"
echo "═══ PR Preflight Check for Issue #$ISSUE_NUM ═══"
echo ""
# Fetch open PRs
OPEN_PRS=$(curl -s -H "$AUTH" "$API/repos/$REPO/pulls?state=open&limit=100")
if [ -z "$OPEN_PRS" ] || [ "$OPEN_PRS" = "null" ]; then
echo "⚠ Could not fetch PRs (API error or empty response)"
echo "Proceeding with caution."
exit 0
fi
# Find PRs referencing this issue
MATCHES=$(echo "$OPEN_PRS" | jq -r ".[] | select(.title | test(\"#$ISSUE_NUM\"; \"i\") or .body // \"\" | test(\"#$ISSUE_NUM\"; \"i\")) | \" PR #\\(.number): \\(.title) [\\(.head.ref)] (\\(.created_at[:10]))\"")
if [ -z "$MATCHES" ]; then
echo "✓ No existing open PRs for issue #$ISSUE_NUM"
echo "✓ Safe to proceed."
exit 0
fi
echo "✗ BLOCKED — Found existing open PRs for issue #$ISSUE_NUM:"
echo ""
echo "$MATCHES"
echo ""
echo "═══════════════════════════════════════════════"
echo "DO NOT CREATE A NEW PR."
echo ""
echo "Options:"
echo " 1. Review and merge an existing PR"
echo " 2. Close duplicates first: ./scripts/cleanup-duplicate-prs.sh --close"
echo " 3. Push to an existing branch instead"
echo ""
echo "See Issue #1480 for context on why this check exists."
echo "═══════════════════════════════════════════════"
exit 1

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env python3
"""
pr_preflight_check.py — Prevent duplicate PR creation.
Call before creating any PR:
python3 scripts/pr_preflight_check.py 1128
Returns exit code 0 if safe, 1 if blocked.
Designed for agent workflows — agents MUST call this before `curl ... /pulls`.
Issue #1480: The duplicate PR problem.
"""
import json
import os
import sys
import urllib.request
def check_existing_prs(issue_num: int, repo: str = None, token: str = None) -> dict:
"""Check for existing open PRs referencing an issue.
Returns dict with:
safe (bool): True if no duplicates found
matches (list): List of PR dicts that reference the issue
message (str): Human-readable status
"""
gitea_url = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
token = token or os.environ.get("GITEA_TOKEN", "")
repo = repo or os.environ.get("REPO", "Timmy_Foundation/the-nexus")
if not token:
token_path = os.path.expanduser("~/.config/gitea/token")
if os.path.exists(token_path):
token = open(token_path).read().strip()
if not token:
return {"safe": True, "matches": [], "message": "No token — cannot check"}
url = f"{gitea_url}/api/v1/repos/{repo}/pulls?state=open&limit=100"
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
with urllib.request.urlopen(req, timeout=10) as resp:
prs = json.loads(resp.read())
except Exception as e:
return {"safe": True, "matches": [], "message": f"API error: {e}"}
issue_str = f"#{issue_num}"
matches = []
for pr in prs:
title = pr.get("title", "")
body = pr.get("body") or ""
if issue_str in title or issue_str in body:
matches.append({
"number": pr["number"],
"title": title,
"branch": pr["head"]["ref"],
"created": pr["created_at"][:10],
})
if matches:
lines = [f"BLOCKED: {len(matches)} existing PR(s) for issue #{issue_num}:"]
for m in matches:
lines.append(f" PR #{m['number']}: {m['title']} [{m['branch']}] ({m['created']})")
lines.append("")
lines.append("DO NOT CREATE A NEW PR. Review existing ones first.")
return {"safe": False, "matches": matches, "message": "\n".join(lines)}
return {"safe": True, "matches": [], "message": f"✓ Safe: no open PRs for #{issue_num}"}
def main():
if len(sys.argv) < 2:
print("Usage: pr_preflight_check.py <issue_number> [repo]")
print("Example: pr_preflight_check.py 1128")
print(" pr_preflight_check.py 1339 Timmy_Foundation/the-nexus")
sys.exit(2)
issue_num = int(sys.argv[1])
repo = sys.argv[2] if len(sys.argv) > 2 else None
result = check_existing_prs(issue_num, repo)
print(result["message"])
if not result["safe"]:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()