Skip to content

Commit 3f77c7c

Browse files
authored
Merge pull request #208 from MITLibraries/TIMX-355-control-field-index
Address liteary form control field bug
2 parents e00c46e + 775458b commit 3f77c7c

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ line-length = 90
44
[tool.mypy]
55
disallow_untyped_calls = true
66
disallow_untyped_defs = true
7-
exclude = ["tests/"]
7+
exclude = ["tests/", "output/"]
88

99
[tool.pytest.ini_options]
1010
log_level = "INFO"

tests/sources/xml/test_marc.py

+10
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,16 @@ def test_get_literary_form_transforms_correctly_if_char_positions_blank():
13491349
assert Marc.get_literary_form(source_record) is None
13501350

13511351

1352+
def test_get_literary_form_returns_none_if_control_field_too_short(caplog):
1353+
caplog.set_level("DEBUG")
1354+
source_record = create_marc_source_record_stub(
1355+
control_field_insert='<controlfield tag="008">220613s '
1356+
"|||||o||||||||||||d</controlfield>",
1357+
)
1358+
assert Marc.get_literary_form(source_record) is None
1359+
assert "could not parse literary form" in caplog.text
1360+
1361+
13521362
def test_get_links_success():
13531363
source_record = create_marc_source_record_stub(
13541364
datafield_insert=(

transmogrifier/sources/xml/marc.py

+10
Original file line numberDiff line numberDiff line change
@@ -586,10 +586,20 @@ def get_literary_form(cls, source_record: Tag) -> str | None:
586586
and Leader/07 (Bibliographic level) contains code
587587
a (Monographic component part), c (Collection), d (Subunit),
588588
or m (Monograph).
589+
590+
If control field 008 is shorter than 34 characters, return None as we cannot
591+
accurately determine.
589592
"""
590593
leader_field = cls._get_leader_field(source_record)
591594
control_field = cls._get_control_field(source_record)
592595
if leader_field[6] in "at" and leader_field[7] in "acdm":
596+
if len(control_field) <= 33: # noqa: PLR2004
597+
message = (
598+
f"Record ID '{cls.get_source_record_id(source_record)}' has less than"
599+
"34 characters for control field 008, could not parse literary form."
600+
)
601+
logger.debug(message)
602+
return None
593603
if control_field[33] in "0se":
594604
return "Nonfiction"
595605
return "Fiction"

0 commit comments

Comments
 (0)