Skip to content

prerelease: minor version #252

prerelease: minor version

prerelease: minor version #252

Workflow file for this run

name: "Riksdagen Records: data integrity tests"
on:
push:
paths:
- data/**
- test/.run
pull_request:
branches:
- 'main'
- 'dev'
workflow_dispatch:
jobs:
schemas:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Validate XML files against ParlaClarin schema
run: |
python -m unittest test.schemas
alto-comparison:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Make sure the records do not differ too much from the OCR result
env: # authentication
KBLAB_USERNAME: ${{ secrets.KBLAB_USERNAME }}
KBLAB_PASSWORD: ${{ secrets.KBLAB_PASSWORD }}
run: |
python -m unittest test.altocheck
empty-speech:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Test there are no empty u or seg elements
run: |
python -m unittest test.empty-speech
mp:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Download metadata
run: |
git clone https://github.com/swerik-project/riksdagen-persons.git
cd riksdagen-persons && git checkout main && cd ../
- name: Test speaker data integrity
run: |
export METADATA_PATH="riksdagen-persons/data"
python -m unittest test.mp
next-prev:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Check that next/prev tagging is coherent
run: |
python -m unittest test.next_prev
paragraph-ids:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Check that all elements with text have IDs
run: |
python -m unittest test.paragraph_has_id
records-in-sequence:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: Check that there are no duplicate or missing protocol numbers between first and last
run: |
python -m unittest test.records-in-sequence
doc-id-page-nr-filename:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/checkout
- uses: ./.github/actions/setup-python
with:
python-version: ${{ matrix.python-version }}
- uses: ./.github/actions/free-up-space
- uses: ./.github/actions/install-dependencies
- name: check that Doc IDs appear in TEI elem, doc ID == filename, doc ID used in facs
run: |
python -m unittest test.test_doc-id