Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
[![PyPI - Version](https://img.shields.io/pypi/v/mets-mods2tei?logo=PyPI)](https://pypi.org/project/mets-mods2tei/)
[![GitHub License](https://img.shields.io/github/license/slub/mets-mods2tei)](LICENSE)

Convert bibliographic meta data in METS/MODS format to TEI headers and optionally serialize linked ALTO-encoded OCR to TEI text.
Convert bibliographic metadata in METS/MODS format to TEI headers and optionally serialize linked ALTO-encoded OCR to TEI text.

## Background

[MODS](http://www.loc.gov/standards/mods/) is the de-facto standard for encoding bibliographic
meta data in libraries. It is usually included as a separate section into
metadata in libraries. It is usually included as a separate section into
[METS](http://www.loc.gov/standards/mets/) XML files. Physical and logical structure of a document
are expressed in terms of structural mappings (`structMap` elements).

[TEI](https://tei-c.org/) is the de-facto standard for representing digital text for research
purposes. It usually includes detailed bibliographic meta data in its
purposes. It usually includes detailed bibliographic metadata in its
[header](https://tei-c.org/release/doc/tei-p5-doc/de/html/ref-teiHeader.html).

Since these standards contain a considerable amount of degrees of freedom, the conversion uses
Expand Down Expand Up @@ -213,8 +213,6 @@ Options:
-u, --url-prefix TEXT URL prefix to add to path before storing references
(or else keep local file refs)
--help Show this message and exit.


```

</p></details>
Expand Down
2 changes: 1 addition & 1 deletion mets_mods2tei/scripts/mets_mods2tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def cli(mets, output, ocr, text_group, img_group, add_refs, log_level):
if `--add-refs` contains `page`, then reference the corresponding
base image files (by file name) from `--img-group`. Likewise,
if `--add-refs` contains `line`, then reference the corresponding
textline segments (by XML ID) from `--text-group`.
text line segments (by XML ID) from `--text-group`.

Output XML to `--output (use '-' for stdout), log to stderr.`
"""
Expand Down
14 changes: 7 additions & 7 deletions tests/test_alto.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,38 @@ def datadir(tmpdir, request):

def test_constructor():
"""
Test the creation of an empty Alto instance
Test the creation of an empty Alto instance.
"""
alto = Alto()
assert(alto.tree is None)

def test_reading_local_file(datadir):
"""
Test reading a local alto file
Test reading a local ALTO file.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
assert(alto.tree is not None)

def test_loading_local_file(datadir):
"""
Test loading a local alto file
Test loading a local ALTO file.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
assert(alto.tree is not None)

def test_text_block_extraction(datadir):
"""
Test the extraction of text blocks
Test the extraction of text blocks.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
assert(len(list(alto.get_text_blocks())) == 1)

def test_text_line_extraction(datadir):
"""
Test the extraction of text lines
Test the extraction of text lines.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
Expand All @@ -69,7 +69,7 @@ def test_text_line_extraction(datadir):

def test_text_line_text_extraction(datadir):
"""
Test the extraction of text from text lines
Test the extraction of text from text lines.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
Expand All @@ -79,7 +79,7 @@ def test_text_line_text_extraction(datadir):

def test_index_assingment(datadir):
"""
Test the identifcation of the most likely insertion index
Test the identification of the most likely insertion index.
"""
with open(datadir.join('test_alto.xml'), 'rb') as f:
alto = Alto.read(f)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_iso15924.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ def test_constructor():

def test_existing_script():
"""
Test requesting the script name for an existing code
Test requesting the script name for an existing code.
"""
iso = Iso15924()
assert(iso.get('Latf') == "Latin (Fraktur variant)")

def test_non_existing_script():
"""
Test requesting the script name for a non-existing code
Test requesting the script name for a non-existing code.
"""
iso = Iso15924()
assert(iso.get('kkk') == "Unknown")
22 changes: 11 additions & 11 deletions tests/test_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,30 @@ def datadir(tmpdir, request):

def test_constructor():
"""
Test the creation of an empty Mets instance
Test the creation of an empty METS instance.
"""
mets = Mets()
assert mets.mets is None

def test_reading_local_file(datadir):
"""
Test reading a local mets file
Test reading a local METS file.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.read(f)
assert mets.mets is not None

def test_loading_local_file(datadir):
"""
Test loading a local mets file
Test loading a local METS file.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.from_file(f)
assert mets.mets is not None

def test_intermediate_file_loading(datadir):
"""
Test loading a local mets file
Test loading a local METS file.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets()
Expand All @@ -61,7 +61,7 @@ def test_intermediate_file_loading(datadir):

def test_fulltext_group_name(subtests, datadir):
"""
Test getting and setting the full text group name
Test getting and setting the full text group name.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.read(f)
Expand All @@ -75,7 +75,7 @@ def test_fulltext_group_name(subtests, datadir):

def test_mappings(subtests, datadir):
"""
Test the correct interpretation of the structural linking
Test the correct interpretation of the structural linking.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.read(f)
Expand All @@ -91,7 +91,7 @@ def test_mappings(subtests, datadir):

def test_data_assignment(subtests, datadir):
"""
Test the correct assignment of meta data
Test the correct assignment of metadata.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.read(f)
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_data_assignment(subtests, datadir):

def test_mappings_only_phys(subtests, datadir):
"""
Test the correct interpretation of the structural linking
Test the correct interpretation of the structural linking.
"""
f = open(datadir.join('test_mets_nodiv.xml'))
mets = Mets()
Expand All @@ -163,9 +163,9 @@ def test_mappings_only_phys(subtests, datadir):
assert mets.get_img('PHYS_0005') == 'https://digital.slub-dresden.de/data/kitodo/BurgAbha_1852685697/BurgAbha_1852685697_tif/jpegs/00000005.tif.original.jpg'

def test_mappings_only_phys_local(subtests, datadir):
'''
Test the correct interpretation of local file references
'''
"""
Test the correct interpretation of local file references.
"""
f = open(datadir.join('test_mets_nodiv_local.xml'))
mets = Mets()
mets.image_group_name = 'ORIGINAL'
Expand Down
18 changes: 9 additions & 9 deletions tests/test_tei.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ def datadir(tmpdir, request):

def test_constructor():
"""
Test the creation of an empty Tei instance
Test the creation of an empty TEI instance.
"""
tei = Tei()
assert tei.tree is not None

def test_reading_local_file(subtests, datadir):
"""
Test reading from a local mets file
Test reading from a local METS file.
"""
f = open(datadir.join('test_mets.xml'))
mets = Mets.read(f)
Expand All @@ -57,9 +57,9 @@ def test_reading_local_file(subtests, datadir):
assert len(tei.tree.xpath('/tei:TEI/tei:text/tei:body//tei:div//tei:p//tei:lb', namespaces=NS)) > 8000

def test_reading_local_file_local_ocr(subtests, datadir):
'''
Test reading from a local mets file, referencing local alto files
'''
"""
Test reading from a local METS file, referencing local ALTO files.
"""
f = open(datadir.join('test_mets_nodiv_local.xml'))
mets = Mets.read(f)
tei = Tei()
Expand All @@ -71,9 +71,9 @@ def test_reading_local_file_local_ocr(subtests, datadir):
assert len(tei.tree.xpath('/tei:TEI/tei:text/tei:body//tei:div//tei:p//tei:lb', namespaces=NS)) > 800

def test_reading_remote_url(tmpdir):
'''
Test reading from a remote mets link
'''
"""
Test reading from a remote METS link.
"""
from urllib.request import urlopen
mets = Mets()
mets.fromfile(urlopen("https://digital.slub-dresden.de/oai/?verb=GetRecord&metadataPrefix=mets"
Expand Down Expand Up @@ -102,7 +102,7 @@ def test_string_dumping():

def test_data_assignment(subtests):
"""
Test the correct assignment of meta data
Test the correct assignment of metadata.
"""
tei = Tei()

Expand Down