Skip to content

Enable summarization by subsets and groups #7499

Enable summarization by subsets and groups

Enable summarization by subsets and groups #7499

name: Test Catalog Preparation
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.event.pull_request.number || github.ref_name }}
cancel-in-progress: true
jobs:
preparation:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
OS: ubuntu-latest
UNITXT_DEFAULT_VERBOSITY: error
DATASETS_VERBOSITY: error
HF_HUB_VERBOSITY: error
HF_DATASETS_DISABLE_PROGRESS_BARS: "True"
HF_HUB_DOWNLOAD_TIMEOUT: 60
HF_HUB_ETAG_TIMEOUT: 60
TQDM_DISABLE: "True"
strategy:
matrix:
modulo: [0,1,2,3,4,5,6,7]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'
- name: Install Dependencies
run: bash utils/install.sh
- uses: ./.github/actions/install-internal-pip
with:
ssh-private-key: ${{ secrets.LLMEVALKIT_SSH_KEY }}
- name: Hugging Face Login
run: |
for i in {1..5}; do
huggingface-cli login --token ${{ secrets.UNITXT_READ_HUGGINGFACE_HUB_FOR_TESTS }} && break || sleep $((2 ** i));
done
- name: Run Tests
run: |
modulo="${{ matrix.modulo }}"
echo "modulo=${modulo}" >> $GITHUB_STEP_SUMMARY
echo "sed -i 's/^num_par = 1 /num_par = 8 /' tests/catalog/test_preparation.py" > sedit.sh
echo "sed -i 's/^modulo = 0/modulo = ${modulo}/' tests/catalog/test_preparation.py" >> sedit.sh
sh sedit.sh
python -m unittest tests.catalog.test_preparation