Add 2023/24 and 2024/25 protocols to the corpus #227
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: "Riksdagen Records: data integrity tests" | |
| on: | |
| push: | |
| paths: | |
| - data/** | |
| - test/.run | |
| pull_request: | |
| branches: | |
| - 'main' | |
| - 'dev' | |
| workflow_dispatch: | |
| jobs: | |
| schemas: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Validate XML files against ParlaClarin schema | |
| run: | | |
| python -m unittest test.schemas | |
| alto-comparison: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Make sure the records do not differ too much from the OCR result | |
| env: # authentication | |
| KBLAB_USERNAME: ${{ secrets.KBLAB_USERNAME }} | |
| KBLAB_PASSWORD: ${{ secrets.KBLAB_PASSWORD }} | |
| run: | | |
| python -m unittest test.altocheck | |
| empty-speech: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Test there are no empty u or seg elements | |
| run: | | |
| python -m unittest test.empty-speech | |
| mp: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Download metadata | |
| run: | | |
| git clone https://github.com/swerik-project/riksdagen-persons.git | |
| cd riksdagen-persons && git checkout main && cd ../ | |
| - name: Test speaker data integrity | |
| run: | | |
| export METADATA_PATH="riksdagen-persons/data" | |
| python -m unittest test.mp | |
| next-prev: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Check that next/prev tagging is coherent | |
| run: | | |
| python -m unittest test.next_prev | |
| paragraph-ids: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Check that all elements with text have IDs | |
| run: | | |
| python -m unittest test.paragraph_has_id | |
| records-in-sequence: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: Check that there are no duplicate or missing protocol numbers between first and last | |
| run: | | |
| python -m unittest test.records-in-sequence | |
| doc-id-page-nr-filename: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: [3.8] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/checkout | |
| - uses: ./.github/actions/setup-python | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - uses: ./.github/actions/free-up-space | |
| - uses: ./.github/actions/install-dependencies | |
| - name: check that Doc IDs appear in TEI elem, doc ID == filename, doc ID used in facs | |
| run: | | |
| python -m unittest test.test_doc-id | |