Add ai legal restrictions support #5
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Validate Datasets | |
| on: | |
| pull_request: | |
| paths: | |
| - 'datasets/**' | |
| - 'scripts/validate.py' | |
| - '.github/workflows/validate-datasets.yml' | |
| push: | |
| branches: | |
| - master | |
| paths: | |
| - 'datasets/**' | |
| - 'scripts/validate.py' | |
| jobs: | |
| validate: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| issues: write | |
| pull-requests: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 # Fetch all history for proper diff | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Get changed datasets | |
| id: changed-datasets | |
| run: | | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| # For PRs, compare against the base branch | |
| git fetch origin "${{ github.base_ref }}" --depth=1 | |
| git diff --name-only origin/${{ github.base_ref }}...HEAD | grep "^datasets/" | cut -d'/' -f1-2 | sort -u > changed_datasets.txt | |
| else | |
| # For pushes to master, check the last commit | |
| git diff --name-only HEAD~1 HEAD | grep "^datasets/" | cut -d'/' -f1-2 | sort -u > changed_datasets.txt | |
| fi | |
| if [ -s changed_datasets.txt ]; then | |
| echo "Changed datasets:" | |
| cat changed_datasets.txt | |
| echo "has_changes=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "No dataset changes detected" | |
| echo "has_changes=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Validate changed datasets | |
| if: steps.changed-datasets.outputs.has_changes == 'true' | |
| run: | | |
| validation_failed=false | |
| while IFS= read -r dataset_path; do | |
| echo "----------------------------------------" | |
| echo "Validating $dataset_path" | |
| echo "----------------------------------------" | |
| if python scripts/validate.py "$dataset_path"; then | |
| echo "✅ $dataset_path validation passed" | |
| else | |
| echo "❌ $dataset_path validation failed" | |
| validation_failed=true | |
| fi | |
| echo "" | |
| done < changed_datasets.txt | |
| if [ "$validation_failed" = true ]; then | |
| echo "❌ One or more datasets failed validation" | |
| exit 1 | |
| else | |
| echo "✅ All datasets validated successfully" | |
| fi | |
| - name: Validate all datasets (on push to master) | |
| if: github.event_name == 'push' && github.ref == 'refs/heads/master' | |
| run: | | |
| echo "Running full validation on master branch..." | |
| validation_failed=false | |
| for dataset_dir in datasets/*/; do | |
| if [ -d "$dataset_dir" ]; then | |
| echo "----------------------------------------" | |
| echo "Validating $dataset_dir" | |
| echo "----------------------------------------" | |
| if python scripts/validate.py "$dataset_dir"; then | |
| echo "✅ $dataset_dir validation passed" | |
| else | |
| echo "❌ $dataset_dir validation failed" | |
| validation_failed=true | |
| fi | |
| echo "" | |
| fi | |
| done | |
| if [ "$validation_failed" = true ]; then | |
| echo "❌ One or more datasets failed validation" | |
| exit 1 | |
| else | |
| echo "✅ All datasets validated successfully" | |
| fi | |
| - name: Comment on PR | |
| if: failure() && github.event_name == 'pull_request' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| try { | |
| await github.rest.issues.createComment({ | |
| issue_number: context.issue.number, | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| body: '❌ Dataset validation failed. Please check the workflow logs for details and ensure:\n\n1. Your JSON is properly formatted (one entry per line)\n2. All entries conform to the schema\n3. Required files are present\n\nRun validation locally with:\n```bash\npython scripts/validate.py datasets/your-dataset-name\n```' | |
| }) | |
| } catch (error) { | |
| console.log('Could not create comment:', error.message) | |
| console.log('This is likely due to permissions. The validation still failed - check the Actions tab for details.') | |
| } | |
| - name: Comment on PR success | |
| if: success() && github.event_name == 'pull_request' && steps.changed-datasets.outputs.has_changes == 'true' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| try { | |
| await github.rest.issues.createComment({ | |
| issue_number: context.issue.number, | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| body: '✅ All dataset validations passed! Your changes are ready for review.' | |
| }) | |
| } catch (error) { | |
| console.log('Could not create comment:', error.message) | |
| console.log('This is likely due to permissions. The validation passed successfully!') | |
| } |