Skip to content

Daily Build Embeddings #280

Daily Build Embeddings

Daily Build Embeddings #280

name: Daily Build Embeddings
env:
DIFFUSERS_SLOW_IMPORT: yes
on:
schedule:
- cron: "5 7 * * *" # every day at 07:05
# to run this workflow manually from the Actions tab
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: false
jobs:
matrix-job:
runs-on: ubuntu-latest
container: huggingface/transformers-doc-builder
strategy:
max-parallel: 1 # run sequentially
matrix:
include:
- repo_id: huggingface/tokenizers
doc_folder: docs/source-doc-builder
package_path: bindings/python
- repo_id: huggingface/diffusers
doc_folder: docs/source/en
- repo_id: huggingface/accelerate
doc_folder: docs/source
- repo_id: huggingface/huggingface_hub
doc_folder: docs/source/en
- repo_id: huggingface/transformers
doc_folder: docs/source/en
- repo_id: huggingface/hub-docs
doc_folder: docs/hub
package_name: hub
is_not_python_module: true
- repo_id: huggingface/huggingface.js
doc_folder: docs
is_not_python_module: true
pre_command: npm install -g corepack@latest && corepack enable && cd huggingface.js && pnpm install && pnpm -r build && pnpm --filter doc-internal start
- repo_id: huggingface/transformers.js
doc_folder: docs/source
is_not_python_module: true
- repo_id: huggingface/smolagents
doc_folder: docs/source/en
- repo_id: huggingface/peft
doc_folder: docs/source
- repo_id: huggingface/trl
doc_folder: docs/source
- repo_id: bitsandbytes-foundation/bitsandbytes
doc_folder: docs/source
- repo_id: huggingface/lerobot
doc_folder: docs/source
- repo_id: huggingface/pytorch-image-models
doc_folder: hfdocs/source
package_name: timm
- repo_id: huggingface/hub-docs
doc_folder: docs/inference-providers
package_name: inference-providers
is_not_python_module: true
- repo_id: huggingface/safetensors
doc_folder: docs/source
package_path: bindings/python
- repo_id: huggingface/hf-endpoints-documentation
doc_folder: docs/source
package_name: inference-endpoints
is_not_python_module: true
- repo_id: huggingface/dataset-viewer
doc_folder: docs/source
package_name: dataset-viewer
is_not_python_module: true
timeout-minutes: 360 # Set timeout to 6 hours
steps:
- name: Setup REPO_NAME
shell: bash
run: |
current_path=$(pwd)
repo_id="${{ matrix.repo_id }}"
repo_name="${repo_id#*/}"
echo "REPO_NAME=${repo_name}" >> $GITHUB_ENV
- name: Checkout repository
uses: actions/checkout@v2
with:
repository: ${{ matrix.repo_id }}
path: ${{ github.workspace }}/${{ env.REPO_NAME }}
- uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install libgl1
run: apt-get install -y libgl1
- name: Export PIP_OR_UV ('pip' or 'uv pip')
run: |
if [ -z "${{ job.container }}" ]
then
echo "PIP_OR_UV=uv pip" >> $GITHUB_ENV
else
echo "PIP_OR_UV=pip" >> $GITHUB_ENV
fi
- name: Setup environment
shell: bash
run: |
if [[ "${{ matrix.is_not_python_module }}" != "true" ]]; then
current_path=$(pwd)
cd ${{ env.REPO_NAME }}
if [[ -n "${{ matrix.package_path }}" ]]; then
cd ${{ matrix.package_path }}
$PIP_OR_UV install .[dev]
$PIP_OR_UV install --force-reinstall numpy==1.26.4
cd $current_path
else
$PIP_OR_UV install .[dev]
$PIP_OR_UV install --force-reinstall numpy==1.26.4
cd $current_path
fi
fi
rm -rf doc-builder
rm -rf .git
git clone https://github.com/huggingface/doc-builder.git
cd doc-builder
git fetch
git checkout main
$PIP_OR_UV install .
- name: Run pre-command
shell: bash
run: |
if [ ! -z "${{ matrix.pre_command }}" ]
then
bash -c "${{ matrix.pre_command }}"
fi
- name: Build embeddings
shell: bash
run: |
echo Building docs for ${{ matrix.package_name || env.REPO_NAME }}
FLAGS=""
if [[ "${{ matrix.is_not_python_module }}" == "true" ]]; then
FLAGS="--not_python_module"
fi
doc-builder embeddings ${{ matrix.package_name || env.REPO_NAME }} ${{ env.REPO_NAME }}/${{ matrix.doc_folder }} --hf_ie_name docs-embed-bge-base-en-v1-5 --hf_ie_namespace huggingface --hf_ie_token ${{ secrets.HF_IE_TOKEN }} --meilisearch_key ${{ secrets.MEILISEARCH_KEY }} $FLAGS
gradio-job:
runs-on: ubuntu-latest
steps:
- name: Checkout doc-builder
uses: actions/checkout@v2
- name: Install doc-builder
run: pip install .[dev]
- name: Add gradio docs to meilisearch
run: doc-builder add-gradio-docs --hf_ie_name docs-embed-bge-base-en-v1-5 --hf_ie_namespace huggingface --hf_ie_token ${{ secrets.HF_IE_TOKEN }} --meilisearch_key ${{ secrets.MEILISEARCH_KEY }}
cleanup-job:
needs: matrix-job
runs-on: ubuntu-latest
if: always() # This ensures that the cleanup job runs regardless of the result of matrix-job
steps:
- name: Checkout doc-builder
uses: actions/checkout@v2
- name: Install doc-builder
run: pip install .[dev]
- name: Success Cleanup
if: needs.matrix-job.result == 'success' # Runs if all matrix jobs succeeded
run: doc-builder meilisearch-clean --meilisearch_key ${{ secrets.MEILISEARCH_KEY }} --swap
- name: Failure Cleanup
if: needs.matrix-job.result == 'failure' # Runs if any matrix job failed
run: doc-builder meilisearch-clean --meilisearch_key ${{ secrets.MEILISEARCH_KEY }}