Skip to content

Doc Search Scripts (All Stable, Preview Full) #6

Doc Search Scripts (All Stable, Preview Full)

Doc Search Scripts (All Stable, Preview Full) #6

name: Doc Search Scripts (All Stable, Preview Full)
on:
workflow_dispatch:
inputs:
preview_base_url:
description: 'Preview base URL'
required: true
default: 'https://docs.tidb.io/'
type: string
language:
description: 'Language scope for preview full prewarm'
required: true
default: en
type: choice
options:
- both
- en
- zh
preview_sitemap_url:
description: 'Preview sitemap index URL'
required: true
default: 'https://docs.tidb.io/sitemap/sitemap-index.xml'
type: string
scraper_repo:
description: 'Docsearch-scraper repo used to patch workflow image'
required: true
default: 'https://github.com/shczhen/docsearch-scraper.git'
type: string
scraper_ref:
description: 'Docsearch-scraper branch/ref used to patch workflow image'
required: true
default: 'incrementalCrawl'
type: string
permissions:
contents: read
concurrency:
group: docsearch-all-stable-full-preview
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
ref: 'doc-search'
- name: Login to Container Registry
uses: docker/login-action@v1
with:
registry: ${{ secrets.DOCKER_REGISTRY }}
username: ${{ secrets.DOCKER_REGISTRY_USERNAME }}
password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }}
- name: Run scripts
run: |
set -o pipefail
cd docsearch
touch .env
echo "APPLICATION_ID=${{ secrets.ALGOLIA_APPLICATION_ID }}" >> .env
echo "API_KEY=${{ secrets.ALGOLIA_API_KEY }}" >> .env
echo "GITHUB_AUTH_TOKEN=${{ secrets.GH_TOKEN }}" >> .env
export GITHUB_AUTH_TOKEN=${{ secrets.GH_TOKEN }}
export CRAWL_LANG="${{ inputs.language }}"
PREVIEW_BASE_URL="${{ inputs.preview_base_url }}"
export CRAWL_LOCAL_URL="${PREVIEW_BASE_URL%/}/"
export PREVIEW_SITEMAP_URL="${{ inputs.preview_sitemap_url }}"
echo "Run preview full prewarm against: $CRAWL_LOCAL_URL"
echo "Use preview sitemap URL: $PREVIEW_SITEMAP_URL"
preview_home_status="$(curl -sS -o /dev/null -w "%{http_code}" "$CRAWL_LOCAL_URL")"
if [ "$preview_home_status" != "200" ]; then
echo "Preview base URL probe failed: $CRAWL_LOCAL_URL (status=$preview_home_status)"
exit 1
fi
if curl -sS "$CRAWL_LOCAL_URL" | grep -qi "<title>.*404"; then
echo "Preview base URL resolved to a 404 page: $CRAWL_LOCAL_URL"
exit 1
fi
preview_sitemap_probe="$(curl -sS -L -o /dev/null -w "%{http_code} %{url_effective}" "$PREVIEW_SITEMAP_URL")"
preview_sitemap_status="${preview_sitemap_probe%% *}"
preview_sitemap_effective_url="${preview_sitemap_probe#* }"
if [ "$preview_sitemap_status" != "200" ]; then
echo "Preview sitemap probe failed: $PREVIEW_SITEMAP_URL (status=$preview_sitemap_status, effective=$preview_sitemap_effective_url)"
exit 1
fi
if ! curl -sS -L "$PREVIEW_SITEMAP_URL" | grep -Eqi '<(urlset|sitemapindex)'; then
echo "Preview sitemap does not look like XML sitemap: $PREVIEW_SITEMAP_URL"
exit 1
fi
export BASE_DOCKER_REGISTRY="${{ secrets.DOCKER_REGISTRY }}"
export PATCH_SCRAPER_REPO="${{ inputs.scraper_repo }}"
export PATCH_SCRAPER_REF="${{ inputs.scraper_ref }}"
export PATCHED_DOCKER_REGISTRY="local"
export PATCH_IMAGE_ENV_FILE="/tmp/docsearch-patched-image.env"
./all-stable/scripts/prepare-patched-scraper-image.sh "$(pwd)/all-stable"
. "$PATCH_IMAGE_ENV_FILE"
export DOCKER_REGISTRY="$PATCHED_DOCKER_REGISTRY"
echo "Run preview full prewarm with patched image: $PATCHED_SCRAPER_IMAGE"
./all-stable/scripts/crawl-full-preview.sh "$(pwd)/all-stable" 2>&1 | tee /tmp/docsearch-all-stable-full-preview.log
./all-stable/scripts/check-crawl-errors.sh /tmp/docsearch-all-stable-full-preview.log