fix: Eliminate RDD usage across SynapseML for Spark 4.0 compatibility #2067

	name: "Check Dead Links"

	on:
	workflow_dispatch:
	push:
	branches: [ "master" ]
	pull_request:
	# The branches below must be a subset of the branches above
	branches: [ "master" ]

	jobs:
	scan_links:
	name: Scan Website for Dead Links
	runs-on: ubuntu-latest

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Fetch sitemap URLs
	run: \|
	curl -s https://microsoft.github.io/SynapseML/sitemap.xml \
	\| grep -oP '<loc>[^<]+</loc>' \
	\| sed 's/<\/?loc>//g' \
	> urls.txt
	echo "Found $(wc -l < urls.txt) URLs in sitemap"

	- name: Scan for dead links
	uses: lycheeverse/lychee-action@v2
	with:
	args: >-
	--no-progress
	--max-concurrency 8
	--max-retries 5
	--retry-wait-time 5
	--timeout 30
	--accept '100..=103,200..=299,503'
	urls.txt

Provide feedback