ci: add SynapseML-Internal compatibility check to OSS pipeline #2198
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: "Check Dead Links" | |
| on: | |
| workflow_dispatch: | |
| push: | |
| branches: [ "master" ] | |
| pull_request: | |
| # The branches below must be a subset of the branches above | |
| branches: [ "master" ] | |
| jobs: | |
| scan_links: | |
| name: Scan Website for Dead Links | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Fetch sitemap URLs | |
| run: | | |
| set -euo pipefail | |
| SITEMAP_URL="https://microsoft.github.io/SynapseML/sitemap.xml" | |
| # Download sitemap to a file with retries and error reporting | |
| HTTP_CODE=$(curl --silent --show-error --compressed \ | |
| --retry 3 --retry-delay 5 \ | |
| -o sitemap.xml -w '%{http_code}' \ | |
| "$SITEMAP_URL") | |
| if [ "$HTTP_CODE" != "200" ]; then | |
| echo "::error::Failed to fetch sitemap from $SITEMAP_URL (HTTP $HTTP_CODE)" | |
| exit 1 | |
| fi | |
| SITEMAP_SIZE=$(wc -c < sitemap.xml) | |
| echo "Downloaded sitemap: $SITEMAP_SIZE bytes" | |
| # Extract URLs using POSIX extended regex (portable, no PCRE dependency) | |
| grep -oE '<loc>[^<]+</loc>' sitemap.xml \ | |
| | sed 's/<\/*loc>//g' \ | |
| > urls.txt | |
| URL_COUNT=$(wc -l < urls.txt) | |
| echo "Found $URL_COUNT URLs in sitemap" | |
| if [ "$URL_COUNT" -eq 0 ]; then | |
| echo "::error::Sitemap at $SITEMAP_URL contained no URLs (file was $SITEMAP_SIZE bytes)" | |
| head -c 500 sitemap.xml | |
| exit 1 | |
| fi | |
| - name: Scan for dead links | |
| uses: lycheeverse/lychee-action@v2 | |
| with: | |
| args: >- | |
| --no-progress | |
| --max-concurrency 8 | |
| --max-retries 5 | |
| --retry-wait-time 5 | |
| --timeout 30 | |
| --accept 100..=103,200..=299,503 | |
| urls.txt |