diff --git a/.github/workflows/nestbuildmatrix.yml b/.github/workflows/nestbuildmatrix.yml index 329f5baa29..dd1d1664ec 100644 --- a/.github/workflows/nestbuildmatrix.yml +++ b/.github/workflows/nestbuildmatrix.yml @@ -473,6 +473,138 @@ jobs: run: | flake8 . + pr-authors: + runs-on: "ubuntu-22.04" + if: github.event_name == 'pull_request' + steps: + - name: Harden Runner + uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 + with: + egress-policy: audit + disable-telemetry: true + + - name: "Checkout repository content" + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + with: + fetch-depth: 0 + + - name: "Set up Python 3.x" + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: "3.10" + + - name: "Install dependencies" + run: | + pip install requests + + - name: "Check PR authors" + id: check_authors + env: + GITHUB_TOKEN: ${{ github.token }} + PRIVATE_REPO_TOKEN: ${{ secrets.PRIVATE_REPO_TOKEN }} + PRIVATE_REPO_OWNER: ${{ secrets.PRIVATE_REPO_OWNER }} + PRIVATE_REPO_NAME: ${{ secrets.PRIVATE_REPO_NAME }} + FAIL_ON_UNKNOWN_AUTHORS: ${{ vars.FAIL_ON_UNKNOWN_AUTHORS || 'true' }} + run: | + # Run the script and capture outputs + echo "Running PR authors check script..." + python build_support/check_pr_authors.py \ + --pr-number ${{ github.event.pull_request.number }} \ + --repo-owner ${{ github.repository_owner }} \ + --repo-name ${{ github.event.repository.name }} \ + --github-token ${{ github.token }} \ + --private-repo-owner "${{ secrets.PRIVATE_REPO_OWNER }}" \ + --private-repo-name "${{ secrets.PRIVATE_REPO_NAME }}" \ + --private-repo-token "${{ secrets.PRIVATE_REPO_TOKEN }}" \ + --authors-file-path "${{ vars.VALIDATED_AUTHORS_FILE_PATH || 'data/gitlognames.yaml' }}" \ + ${{ vars.FAIL_ON_UNKNOWN_AUTHORS == 'true' && '--fail-on-unknown' || '' }} 2>&1 | tee /tmp/script_output.txt + + # Check if script failed + SCRIPT_EXIT_CODE=${PIPESTATUS[0]} + if [ $SCRIPT_EXIT_CODE -ne 0 ]; then + echo "Script failed with exit code $SCRIPT_EXIT_CODE" + echo "Script output:" + cat /tmp/script_output.txt + exit $SCRIPT_EXIT_CODE + fi + + # Extract outputs for GitHub Actions + if [ -f /tmp/script_output.txt ]; then + # Extract key=value pairs and set as GitHub outputs + grep "^[a-zA-Z_][a-zA-Z0-9_]*=" /tmp/script_output.txt | while IFS='=' read -r key value; do + echo "${key}=${value}" >> $GITHUB_OUTPUT + done + + # Handle formatted authors list for GitHub Actions step summary + if grep -q "authors_formatted< /tmp/authors_formatted.txt + echo "authors_formatted<> $GITHUB_OUTPUT + cat /tmp/authors_formatted.txt >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + fi + fi + + - name: "Display PR authors summary" + run: | + # Create summary content + echo "## PR Authors Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Total unique authors:** ${{ steps.check_authors.outputs.author_count }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Authors:**" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + echo "${{ steps.check_authors.outputs.authors_formatted }}" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY + + # Add validation check results if available + if [ "${{ steps.check_authors.outputs.validation_status }}" = "success" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "## Author Validation Check" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "✅ **SUCCESS: All PR authors are validated**" >> $GITHUB_STEP_SUMMARY + elif [ "${{ steps.check_authors.outputs.validation_status }}" = "failure" ]; then + echo "" >> $GITHUB_STEP_SUMMARY + echo "## Author Validation Check" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "❌ **FAILURE:** ${{ steps.check_authors.outputs.unknown_count }} unknown author(s) found" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "The authors of this PR may be contributing for the first time or may have modified their author information. Author information requires review." >> $GITHUB_STEP_SUMMARY + else + echo "" >> $GITHUB_STEP_SUMMARY + echo "ℹ️ **Note:** Author validation check skipped (private repository access not configured)" >> $GITHUB_STEP_SUMMARY + fi + + # Also display in build logs for visibility + echo "=== PR AUTHORS SUMMARY ===" + echo "## PR Authors Summary" + echo "" + echo "**Total unique authors:** ${{ steps.check_authors.outputs.author_count }}" + echo "" + echo "**Authors:**" + echo '```' + echo "${{ steps.check_authors.outputs.authors_formatted }}" + echo '```' + + if [ "${{ steps.check_authors.outputs.validation_status }}" = "success" ]; then + echo "" + echo "## Author Validation Check" + echo "" + echo "✅ **SUCCESS: All PR authors are validated**" + elif [ "${{ steps.check_authors.outputs.validation_status }}" = "failure" ]; then + echo "" + echo "## Author Validation Check" + echo "" + echo "❌ **FAILURE:** ${{ steps.check_authors.outputs.unknown_count }} unknown author(s) found" + echo "" + echo "The authors of this PR may be contributing for the first time or may have modified their author information. Author information requires review." + else + echo "" + echo "ℹ️ **Note:** Author validation check skipped (private repository access not configured)" + fi + echo "=========================" + sphinx-rtd: # as close as possible to the Readthedocs setup (system install cmake, pip install -r doc/requirements.txt) runs-on: "ubuntu-22.04" diff --git a/build_support/check_pr_authors.py b/build_support/check_pr_authors.py new file mode 100755 index 0000000000..65a6e0d5a6 --- /dev/null +++ b/build_support/check_pr_authors.py @@ -0,0 +1,244 @@ +# -*- coding: utf-8 -*- +# +# check_pr_authors.py +# +# This file is part of NEST. +# +# Copyright (C) 2004 The NEST Initiative +# +# NEST is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# NEST is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with NEST. If not, see . +#!/usr/bin/env python3 +""" +Check PR authors against validated author list. + +This script fetches PR commits, extracts author information, and compares +against a validated author list from the NEST release dataset. + +SECURITY NOTE: This script is designed to protect sensitive data: +- Private repository information is not logged +- Validated author names from private repo are not exposed in logs +- PR author names can be logged (they're already public in the PR) +- Sensitive data from private repository is cleaned up from memory after use +""" + +import argparse +import json +import os +import sys +import tempfile +from typing import List, Optional, Tuple + +import requests + + +def secure_cleanup(data: any) -> None: + """Ensure sensitive data is not retained in memory longer than necessary.""" + if isinstance(data, str): + # Overwrite string data with zeros + data = "0" * len(data) + elif isinstance(data, list): + for item in data: + secure_cleanup(item) + elif isinstance(data, dict): + for key, value in data.items(): + secure_cleanup(value) + # Let garbage collector handle the rest + + +def get_pr_commits(pr_number: int, repo_owner: str, repo_name: str, token: str) -> List[dict]: + """Fetch commits from a PR using GitHub API.""" + url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/pulls/{pr_number}/commits" + headers = {"Authorization": f"Bearer {token}", "Accept": "application/vnd.github.v3+json"} + + print(f"Fetching commits from GitHub API for PR #{pr_number} in {repo_owner}/{repo_name}") + + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + + commits = response.json() + if not isinstance(commits, list): + raise ValueError("Unexpected API response (not a JSON array)") + + return commits + except requests.exceptions.RequestException as e: + print(f"Error: Failed to fetch commits from GitHub API: {e}") + sys.exit(1) + except ValueError as e: + print(f"Error: {e}") + sys.exit(1) + + +def extract_unique_authors(commits: List[dict]) -> List[Tuple[str, str]]: + """Extract unique authors from commits.""" + authors = set() + + for commit in commits: + author_name = commit.get("commit", {}).get("author", {}).get("name", "") + author_email = commit.get("commit", {}).get("author", {}).get("email", "") + + if author_name and author_email: + authors.add((author_name, author_email)) + + return sorted(list(authors)) + + +def fetch_validated_authors( + private_repo_owner: str, private_repo_name: str, authors_file_path: str, token: str +) -> Optional[List[str]]: + """Fetch validated authors from private repository.""" + url = f"https://api.github.com/repos/{private_repo_owner}/{private_repo_name}/contents/{authors_file_path}" + headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3.raw"} + + # Don't log private repository information for security + print("Fetching validated authors from private repository...") + + try: + response = requests.get(url, headers=headers) + + if response.status_code == 404: + print("Warning: Could not find validated authors file") + return None + + response.raise_for_status() + content = response.text + + if not content: + print("Warning: Could not decode validated authors file content") + return None + + # Parse YAML format: "Name : githubhandle" + # Extract the "Name " part before the colon + validated_authors = [] + for line in content.split("\n"): + line = line.strip() + if line and not line.startswith("#") and ":" in line: + author_part = line.split(":")[0].strip() + if author_part: + validated_authors.append(author_part) + + # Only log count, never the actual names + print(f"Found {len(validated_authors)} validated author(s) (list stored securely, not exposed)") + return validated_authors + + except requests.exceptions.RequestException as e: + print(f"Warning: Failed to fetch validated authors: {e}") + return None + + +def check_authors_against_validated_list( + pr_authors: List[Tuple[str, str]], validated_authors: List[str] +) -> Tuple[List[str], int]: + """Check PR authors against validated list.""" + unknown_authors = [] + + for name, email in pr_authors: + author_string = f"{name} <{email}>" + if author_string not in validated_authors: + unknown_authors.append(author_string) + + return unknown_authors, len(unknown_authors) + + +def main(): + parser = argparse.ArgumentParser(description="Check PR authors against validated author list") + parser.add_argument("--pr-number", type=int, required=True, help="Pull request number") + parser.add_argument("--repo-owner", required=True, help="Repository owner") + parser.add_argument("--repo-name", required=True, help="Repository name") + parser.add_argument("--github-token", required=True, help="GitHub token") + parser.add_argument("--private-repo-owner", help="Private repository owner") + parser.add_argument("--private-repo-name", help="Private repository name") + parser.add_argument("--private-repo-token", help="Private repository token") + parser.add_argument( + "--authors-file-path", default="data/gitlognames.yaml", help="Path to authors file in private repo" + ) + parser.add_argument("--fail-on-unknown", action="store_true", help="Fail if unknown authors are found") + + args = parser.parse_args() + + # Get PR commits and extract authors + commits = get_pr_commits(args.pr_number, args.repo_owner, args.repo_name, args.github_token) + + if not commits: + print("No commits found in PR") + print("author_count=0") + print("unknown_count=0") + print("validation_status=skipped") + return + + pr_authors = extract_unique_authors(commits) + author_count = len(pr_authors) + + print(f"Found {author_count} unique author(s):") + for name, email in pr_authors: + print(f" - {name} <{email}>") + + # Output author count and authors list for GitHub Actions + print(f"author_count={author_count}") + + # Output authors as formatted list for GitHub Actions step summary + authors_formatted = "\n".join([f"{name} <{email}>" for name, email in pr_authors]) + print("authors_formatted< 0: + print("validation_status=failure") + print("❌ FAILURE: Found unknown author(s) in this PR") + print( + "The authors of this PR may be contributing for the first time or may have " + "modified their author information. Author information requires review." + ) + + # Log unknown authors (these are PR authors, so it's safe to show them) + print("Unknown authors in this PR:") + for author in unknown_authors: + print(f" - {author}") + + # Clean up sensitive data (only validated_authors, not unknown_authors since they're PR data) + secure_cleanup(validated_authors) + + if args.fail_on_unknown: + print("Build failed due to unknown authors") + sys.exit(1) + else: + print("Build continues despite unknown authors") + else: + print("validation_status=success") + print("✅ SUCCESS: All PR authors are validated") + + # Clean up sensitive data + secure_cleanup(validated_authors) + + +if __name__ == "__main__": + main()