3535# Load mappings
3636# ---------------------------------------------------------------------------
3737
38+
3839def load_mapping (name : str ) -> dict [str , str ]:
3940 """Load a JSON mapping file from scripts/mappings/."""
4041 path = MAPPINGS_DIR / f"{ name } .json"
@@ -85,6 +86,7 @@ def is_classification_code(s: str) -> bool:
8586# Core classifier
8687# ---------------------------------------------------------------------------
8788
89+
8890class SubjectClassifier :
8991 def __init__ (self ):
9092 self .literary_form_map = load_mapping ("literary_form" )
@@ -204,10 +206,18 @@ def classify_work(self, work: dict) -> dict:
204206
205207 # Resolve literary_form conflicts
206208 # Fiction wins unless strong Nonfiction-specific signals are present
207- if "Fiction" in result ["literary_form" ] and "Nonfiction" in result ["literary_form" ]:
209+ if (
210+ "Fiction" in result ["literary_form" ]
211+ and "Nonfiction" in result ["literary_form" ]
212+ ):
208213 strong_nonfiction = {
209- 'biography' , 'biographies' , 'autobiography' , 'autobiographies' ,
210- 'memoir' , 'memoirs' , 'juvenile nonfiction'
214+ "biography" ,
215+ "biographies" ,
216+ "autobiography" ,
217+ "autobiographies" ,
218+ "memoir" ,
219+ "memoirs" ,
220+ "juvenile nonfiction" ,
211221 }
212222 subjects_lower = {s .lower ().strip () for s in work .get ("subjects" , [])}
213223 if subjects_lower & strong_nonfiction :
@@ -242,6 +252,7 @@ def classify_work(self, work: dict) -> dict:
242252# Fetching
243253# ---------------------------------------------------------------------------
244254
255+
245256def fetch_work (work_id : str ) -> dict :
246257 """Fetch a work JSON from Open Library."""
247258 work_id = work_id .replace ("/works/" , "" ).strip ()
@@ -263,6 +274,7 @@ def load_work_file(path: str) -> dict:
263274# Output
264275# ---------------------------------------------------------------------------
265276
277+
266278def print_result (work_id : str , result : dict ):
267279 print (f"\n === { work_id } ===" )
268280 for key , values in result .items ():
@@ -284,6 +296,7 @@ def write_result(work_id: str, result: dict, output_dir: str):
284296# CLI
285297# ---------------------------------------------------------------------------
286298
299+
287300def main ():
288301 parser = argparse .ArgumentParser (
289302 description = "Migrate OL legacy subjects to canonical typed tags."
@@ -293,8 +306,12 @@ def main():
293306 group .add_argument ("--file" , help = "Path to a local work JSON file" )
294307 group .add_argument ("--batch" , help = "Path to newline-delimited OL Work IDs file" )
295308
296- parser .add_argument ("--output" , default = "output" , help = "Output directory for batch mode" )
297- parser .add_argument ("--dry-run" , action = "store_true" , help = "Print results, don't write files" )
309+ parser .add_argument (
310+ "--output" , default = "output" , help = "Output directory for batch mode"
311+ )
312+ parser .add_argument (
313+ "--dry-run" , action = "store_true" , help = "Print results, don't write files"
314+ )
298315
299316 args = parser .parse_args ()
300317 classifier = SubjectClassifier ()
0 commit comments