feat(ci): add contributor attribution check on PRs (#9376)

Adds a CI workflow that blocks PRs introducing commits with
unmapped author emails. Checks each new commit's author email
against AUTHOR_MAP in scripts/release.py — GitHub noreply emails
auto-pass, but personal/work emails must be mapped.

Also adds --strict and --diff-base flags to contributor_audit.py
for programmatic use. --strict exits 1 when new unmapped emails
are found; --diff-base scopes the check to only flag emails from
commits after a given ref (grandfathers existing unknowns).

Prevention for the 97-unmapped-email gap found in the April 2026
contributor audit.
This commit is contained in:
Teknium
2026-04-13 21:13:08 -07:00
committed by GitHub
parent 5719c1f391
commit dd86deef13
2 changed files with 119 additions and 0 deletions

70
.github/workflows/contributor-check.yml vendored Normal file
View File

@@ -0,0 +1,70 @@
name: Contributor Attribution Check
on:
pull_request:
branches: [main]
paths:
# Only run when code files change (not docs-only PRs)
- '*.py'
- '**/*.py'
- '.github/workflows/contributor-check.yml'
jobs:
check-attribution:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history needed for git log
- name: Check for unmapped contributor emails
run: |
# Get the merge base between this PR and main
MERGE_BASE=$(git merge-base origin/main HEAD)
# Find any new author emails in this PR's commits
NEW_EMAILS=$(git log ${MERGE_BASE}..HEAD --format='%ae' --no-merges | sort -u)
if [ -z "$NEW_EMAILS" ]; then
echo "No new commits to check."
exit 0
fi
# Check each email against AUTHOR_MAP in release.py
MISSING=""
while IFS= read -r email; do
# Skip teknium and bot emails
case "$email" in
*teknium*|*noreply@github.com*|*dependabot*|*github-actions*|*anthropic.com*|*cursor.com*)
continue ;;
esac
# Check if email is in AUTHOR_MAP (either as a key or matches noreply pattern)
if echo "$email" | grep -qP '\+.*@users\.noreply\.github\.com'; then
continue # GitHub noreply emails auto-resolve
fi
if ! grep -qF "\"${email}\"" scripts/release.py 2>/dev/null; then
AUTHOR=$(git log --author="$email" --format='%an' -1)
MISSING="${MISSING}\n ${email} (${AUTHOR})"
fi
done <<< "$NEW_EMAILS"
if [ -n "$MISSING" ]; then
echo ""
echo "⚠️ New contributor email(s) not in AUTHOR_MAP:"
echo -e "$MISSING"
echo ""
echo "Please add mappings to scripts/release.py AUTHOR_MAP:"
echo -e "$MISSING" | while read -r line; do
email=$(echo "$line" | sed 's/^ *//' | cut -d' ' -f1)
[ -z "$email" ] && continue
echo " \"${email}\": \"<github-username>\","
done
echo ""
echo "To find the GitHub username for an email:"
echo " gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'"
exit 1
else
echo "✅ All contributor emails are mapped in AUTHOR_MAP."
fi

View File

@@ -333,6 +333,16 @@ def main():
default=None,
help="Path to a release notes file to check for missing contributors",
)
parser.add_argument(
"--strict",
action="store_true",
help="Exit with code 1 if new unmapped emails are found (for CI)",
)
parser.add_argument(
"--diff-base",
default=None,
help="Git ref to diff against (only flag emails from commits after this ref)",
)
args = parser.parse_args()
print(f"=== Contributor Audit: {args.since_tag}..{args.until} ===")
@@ -398,6 +408,42 @@ def main():
for email, name in sorted(all_unknowns.items()):
print(f' "{email}": "{name}",')
# ---- Strict mode: fail CI if new unmapped emails are introduced ----
if args.strict and all_unknowns:
# In strict mode, check if ANY unknown emails come from commits in this
# PR's diff range (new unmapped emails that weren't there before).
# This is the CI gate: existing unknowns are grandfathered, but new
# commits must have their author email in AUTHOR_MAP.
new_unknowns = {}
if args.diff_base:
# Only flag emails from commits after diff_base
new_commits_output = git(
"log", f"{args.diff_base}..HEAD",
"--format=%ae", "--no-merges",
)
new_emails = set(new_commits_output.splitlines()) if new_commits_output else set()
for email, name in all_unknowns.items():
if email in new_emails:
new_unknowns[email] = name
else:
new_unknowns = all_unknowns
if new_unknowns:
print()
print(f"=== STRICT MODE FAILURE: {len(new_unknowns)} new unmapped email(s) ===")
print("Add these to AUTHOR_MAP in scripts/release.py before merging:")
print()
for email, name in sorted(new_unknowns.items()):
print(f' "{email}": "<github-username>",')
print()
print("To find the GitHub username:")
print(" gh api 'search/users?q=EMAIL+in:email' --jq '.items[0].login'")
strict_failed = True
else:
strict_failed = False
else:
strict_failed = False
# ---- Release file comparison ----
if args.release_file:
print()
@@ -419,6 +465,9 @@ def main():
print()
print("Done.")
if strict_failed:
sys.exit(1)
if __name__ == "__main__":
main()