@@ -69,6 +69,11 @@ def build_parser() -> argparse.ArgumentParser:
6969 default = 4 ,
7070 help = "Maximum retries for OpenRouter requests" ,
7171 )
72+ parser .add_argument (
73+ "--skip-refine" ,
74+ action = "store_true" ,
75+ help = "Skip the secondary quality refinement pass." ,
76+ )
7277 return parser
7378
7479
@@ -335,6 +340,7 @@ def run(
335340 max_retries : int ,
336341 timeout : int ,
337342 log_enabled : bool ,
343+ skip_refine : bool ,
338344) -> None :
339345 def emit (message : str ) -> None :
340346 if log_enabled :
@@ -363,6 +369,19 @@ def emit(message: str) -> None:
363369 dropped = 0
364370 filter_model = quality_model or model
365371 for idx , row in enumerate (rows , start = 1 ):
372+ if skip_refine :
373+ if not all (row .get (col , "" ).strip () for col in REQUIRED_COLS ):
374+ dropped += 1
375+ missing = [
376+ col for col in REQUIRED_COLS if not row .get (col , "" ).strip ()
377+ ]
378+ emit (
379+ f"[DROP] Row { idx } : { row .get ('Word' , '' ) or '(unnamed)' } — "
380+ f"missing values for { ', ' .join (missing )} "
381+ )
382+ continue
383+ filtered_rows .append (row )
384+ continue
366385 try :
367386 verdict = quality_filter_row (
368387 row , api_key , filter_model , max_retries = max_retries , timeout = timeout
@@ -444,6 +463,7 @@ def main(argv: List[str] | None = None) -> int:
444463 max_retries = args .max_retries ,
445464 timeout = args .timeout ,
446465 log_enabled = log_enabled ,
466+ skip_refine = args .skip_refine ,
447467 )
448468 return 0
449469 except KeyboardInterrupt : # pragma: no cover - user interrupt
0 commit comments