Skip to content

Commit 3fce47c

Browse files
committed
Fix fileSec group sorting with non default groups
1 parent 5e055b0 commit 3fce47c

File tree

2 files changed

+62
-17
lines changed

2 files changed

+62
-17
lines changed

metsrw/mets.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
from . import metadata
1313
from . import utils
1414

15-
# This package
16-
1715

1816
LOGGER = logging.getLogger(__name__)
1917

@@ -24,6 +22,17 @@
2422
)
2523
TRANSFORM_PREFIX = "TRANSFORM"
2624
TRANSFORM_PREFIX_LEN = len(TRANSFORM_PREFIX)
25+
DEFAULT_FILESEC_GROUPS_ORDER = [
26+
"original",
27+
"submissionDocumentation",
28+
"preservation",
29+
"service",
30+
"access",
31+
"license",
32+
"text/ocr",
33+
"metadata",
34+
"derivative",
35+
]
2736

2837

2938
class METSDocument:
@@ -312,28 +321,17 @@ def _filesec(self, files=None):
312321
return filesec
313322

314323
def _sort_filegrps(self, filegrps):
315-
uses_order = [
316-
"original",
317-
"submissionDocumentation",
318-
"preservation",
319-
"service",
320-
"access",
321-
"license",
322-
"text/ocr",
323-
"metadata",
324-
"derivative",
325-
]
326324
result = []
327-
count = len(filegrps)
325+
default_groups_count = len(DEFAULT_FILESEC_GROUPS_ORDER)
328326
for i, use in enumerate(filegrps.keys()):
329327
filegrp = filegrps[use]
330328
try:
331-
filegrp_position = uses_order.index(use)
329+
filegrp_position = DEFAULT_FILESEC_GROUPS_ORDER.index(use)
332330
except ValueError:
333-
filegrp_position = count + i
331+
filegrp_position = default_groups_count + i
334332
result.append((filegrp_position, filegrp))
335333

336-
return [v for i, v in sorted(result)]
334+
return [v for _, v in sorted(result, key=lambda i: i[0])]
337335

338336
def serialize(self, fully_qualified=True, normative_structmap=True):
339337
"""

tests/test_mets.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,3 +1169,50 @@ def test_dspace_filegrp_sorting_in_filesec(self):
11691169
def test_get_subsections_counts(mets_path, expected_counts):
11701170
mw = metsrw.METSDocument().fromfile(mets_path)
11711171
assert mw.get_subsections_counts() == expected_counts
1172+
1173+
1174+
@pytest.mark.parametrize(
1175+
"file_group_uses,expected_uses_order",
1176+
[
1177+
(
1178+
[
1179+
"unknown2",
1180+
"original",
1181+
"unknown1",
1182+
"text/ocr",
1183+
],
1184+
[
1185+
"original",
1186+
"text/ocr",
1187+
"unknown2",
1188+
"unknown1",
1189+
],
1190+
),
1191+
(
1192+
[
1193+
"unknown2",
1194+
"unknown1",
1195+
"original",
1196+
"unknown3",
1197+
],
1198+
[
1199+
"original",
1200+
"unknown2",
1201+
"unknown1",
1202+
"unknown3",
1203+
],
1204+
),
1205+
],
1206+
)
1207+
def test_filegrp_sorting_returns_non_default_groups(
1208+
file_group_uses, expected_uses_order
1209+
):
1210+
file_groups = {
1211+
use: etree.Element(metsrw.utils.lxmlns("mets") + "fileGrp", USE=use)
1212+
for use in file_group_uses
1213+
}
1214+
1215+
mw = metsrw.METSDocument()
1216+
result = mw._sort_filegrps(file_groups)
1217+
1218+
assert [g.attrib["USE"] for g in result] == expected_uses_order

0 commit comments

Comments
 (0)