feat(ci): add contributor attribution check on PRs (#9376)
Adds a CI workflow that blocks PRs introducing commits with unmapped author emails. Checks each new commit's author email against AUTHOR_MAP in scripts/release.py — GitHub noreply emails auto-pass, but personal/work emails must be mapped. Also adds --strict and --diff-base flags to contributor_audit.py for programmatic use. --strict exits 1 when new unmapped emails are found; --diff-base scopes the check to only flag emails from commits after a given ref (grandfathers existing unknowns). Prevention for the 97-unmapped-email gap found in the April 2026 contributor audit.
This commit is contained in:
70
.github/workflows/contributor-check.yml
vendored
Normal file
70
.github/workflows/contributor-check.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
name: Contributor Attribution Check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
# Only run when code files change (not docs-only PRs)
|
||||
- '*.py'
|
||||
- '**/*.py'
|
||||
- '.github/workflows/contributor-check.yml'
|
||||
|
||||
jobs:
|
||||
check-attribution:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Full history needed for git log
|
||||
|
||||
- name: Check for unmapped contributor emails
|
||||
run: |
|
||||
# Get the merge base between this PR and main
|
||||
MERGE_BASE=$(git merge-base origin/main HEAD)
|
||||
|
||||
# Find any new author emails in this PR's commits
|
||||
NEW_EMAILS=$(git log ${MERGE_BASE}..HEAD --format='%ae' --no-merges | sort -u)
|
||||
|
||||
if [ -z "$NEW_EMAILS" ]; then
|
||||
echo "No new commits to check."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check each email against AUTHOR_MAP in release.py
|
||||
MISSING=""
|
||||
while IFS= read -r email; do
|
||||
# Skip teknium and bot emails
|
||||
case "$email" in
|
||||
*teknium*|*noreply@github.com*|*dependabot*|*github-actions*|*anthropic.com*|*cursor.com*)
|
||||
continue ;;
|
||||
esac
|
||||
|
||||
# Check if email is in AUTHOR_MAP (either as a key or matches noreply pattern)
|
||||
if echo "$email" | grep -qP '\+.*@users\.noreply\.github\.com'; then
|
||||
continue # GitHub noreply emails auto-resolve
|
||||
fi
|
||||
|
||||
if ! grep -qF "\"${email}\"" scripts/release.py 2>/dev/null; then
|
||||
AUTHOR=$(git log --author="$email" --format='%an' -1)
|
||||
MISSING="${MISSING}\n ${email} (${AUTHOR})"
|
||||
fi
|
||||
done <<< "$NEW_EMAILS"
|
||||
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo ""
|
||||
echo "⚠️ New contributor email(s) not in AUTHOR_MAP:"
|
||||
echo -e "$MISSING"
|
||||
echo ""
|
||||
echo "Please add mappings to scripts/release.py AUTHOR_MAP:"
|
||||
echo -e "$MISSING" | while read -r line; do
|
||||
email=$(echo "$line" | sed 's/^ *//' | cut -d' ' -f1)
|
||||
[ -z "$email" ] && continue
|
||||
echo " \"${email}\": \"<github-username>\","
|
||||
done
|
||||
echo ""
|
||||
echo "To find the GitHub username for an email:"
|
||||
echo " gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'"
|
||||
exit 1
|
||||
else
|
||||
echo "✅ All contributor emails are mapped in AUTHOR_MAP."
|
||||
fi
|
||||
@@ -333,6 +333,16 @@ def main():
|
||||
default=None,
|
||||
help="Path to a release notes file to check for missing contributors",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--strict",
|
||||
action="store_true",
|
||||
help="Exit with code 1 if new unmapped emails are found (for CI)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--diff-base",
|
||||
default=None,
|
||||
help="Git ref to diff against (only flag emails from commits after this ref)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"=== Contributor Audit: {args.since_tag}..{args.until} ===")
|
||||
@@ -398,6 +408,42 @@ def main():
|
||||
for email, name in sorted(all_unknowns.items()):
|
||||
print(f' "{email}": "{name}",')
|
||||
|
||||
# ---- Strict mode: fail CI if new unmapped emails are introduced ----
|
||||
if args.strict and all_unknowns:
|
||||
# In strict mode, check if ANY unknown emails come from commits in this
|
||||
# PR's diff range (new unmapped emails that weren't there before).
|
||||
# This is the CI gate: existing unknowns are grandfathered, but new
|
||||
# commits must have their author email in AUTHOR_MAP.
|
||||
new_unknowns = {}
|
||||
if args.diff_base:
|
||||
# Only flag emails from commits after diff_base
|
||||
new_commits_output = git(
|
||||
"log", f"{args.diff_base}..HEAD",
|
||||
"--format=%ae", "--no-merges",
|
||||
)
|
||||
new_emails = set(new_commits_output.splitlines()) if new_commits_output else set()
|
||||
for email, name in all_unknowns.items():
|
||||
if email in new_emails:
|
||||
new_unknowns[email] = name
|
||||
else:
|
||||
new_unknowns = all_unknowns
|
||||
|
||||
if new_unknowns:
|
||||
print()
|
||||
print(f"=== STRICT MODE FAILURE: {len(new_unknowns)} new unmapped email(s) ===")
|
||||
print("Add these to AUTHOR_MAP in scripts/release.py before merging:")
|
||||
print()
|
||||
for email, name in sorted(new_unknowns.items()):
|
||||
print(f' "{email}": "<github-username>",')
|
||||
print()
|
||||
print("To find the GitHub username:")
|
||||
print(" gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'")
|
||||
strict_failed = True
|
||||
else:
|
||||
strict_failed = False
|
||||
else:
|
||||
strict_failed = False
|
||||
|
||||
# ---- Release file comparison ----
|
||||
if args.release_file:
|
||||
print()
|
||||
@@ -419,6 +465,9 @@ def main():
|
||||
print()
|
||||
print("Done.")
|
||||
|
||||
if strict_failed:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user