Skip to content

Commit c78b888

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 2e94293 commit c78b888

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

tags/literary_form/migrate_subjects.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
# Load mappings
3636
# ---------------------------------------------------------------------------
3737

38+
3839
def load_mapping(name: str) -> dict[str, str]:
3940
"""Load a JSON mapping file from scripts/mappings/."""
4041
path = MAPPINGS_DIR / f"{name}.json"
@@ -85,6 +86,7 @@ def is_classification_code(s: str) -> bool:
8586
# Core classifier
8687
# ---------------------------------------------------------------------------
8788

89+
8890
class SubjectClassifier:
8991
def __init__(self):
9092
self.literary_form_map = load_mapping("literary_form")
@@ -204,10 +206,18 @@ def classify_work(self, work: dict) -> dict:
204206

205207
# Resolve literary_form conflicts
206208
# Fiction wins unless strong Nonfiction-specific signals are present
207-
if "Fiction" in result["literary_form"] and "Nonfiction" in result["literary_form"]:
209+
if (
210+
"Fiction" in result["literary_form"]
211+
and "Nonfiction" in result["literary_form"]
212+
):
208213
strong_nonfiction = {
209-
'biography', 'biographies', 'autobiography', 'autobiographies',
210-
'memoir', 'memoirs', 'juvenile nonfiction'
214+
"biography",
215+
"biographies",
216+
"autobiography",
217+
"autobiographies",
218+
"memoir",
219+
"memoirs",
220+
"juvenile nonfiction",
211221
}
212222
subjects_lower = {s.lower().strip() for s in work.get("subjects", [])}
213223
if subjects_lower & strong_nonfiction:
@@ -242,6 +252,7 @@ def classify_work(self, work: dict) -> dict:
242252
# Fetching
243253
# ---------------------------------------------------------------------------
244254

255+
245256
def fetch_work(work_id: str) -> dict:
246257
"""Fetch a work JSON from Open Library."""
247258
work_id = work_id.replace("/works/", "").strip()
@@ -263,6 +274,7 @@ def load_work_file(path: str) -> dict:
263274
# Output
264275
# ---------------------------------------------------------------------------
265276

277+
266278
def print_result(work_id: str, result: dict):
267279
print(f"\n=== {work_id} ===")
268280
for key, values in result.items():
@@ -284,6 +296,7 @@ def write_result(work_id: str, result: dict, output_dir: str):
284296
# CLI
285297
# ---------------------------------------------------------------------------
286298

299+
287300
def main():
288301
parser = argparse.ArgumentParser(
289302
description="Migrate OL legacy subjects to canonical typed tags."
@@ -293,8 +306,12 @@ def main():
293306
group.add_argument("--file", help="Path to a local work JSON file")
294307
group.add_argument("--batch", help="Path to newline-delimited OL Work IDs file")
295308

296-
parser.add_argument("--output", default="output", help="Output directory for batch mode")
297-
parser.add_argument("--dry-run", action="store_true", help="Print results, don't write files")
309+
parser.add_argument(
310+
"--output", default="output", help="Output directory for batch mode"
311+
)
312+
parser.add_argument(
313+
"--dry-run", action="store_true", help="Print results, don't write files"
314+
)
298315

299316
args = parser.parse_args()
300317
classifier = SubjectClassifier()

0 commit comments

Comments
 (0)