@@ -62,52 +62,53 @@ def extract_text_first_page(path: str | Path) -> str:
6262 return page .extract_text ()
6363
6464
65- def _rename_puget_sound_energy (
66- path : Path , dir_dest : Path , text_first_page : str
67- ) -> Path :
65+ def _rename_puget_sound_energy (path : Path , text_first_page : str ) -> Path :
6866 m = re .search (r"Issued: (\w+ \d{1,2}, \d{4})" , text_first_page )
6967 date = datetime .datetime .strptime (m .group (1 ), "%B %d, %Y" ).strftime (FMT )
70- path_new = dir_dest / f"pse_{ date } .pdf"
68+ path_new = path . with_name ( f"pse_{ date } .pdf" )
7169 path .rename (path_new )
7270 return path_new
7371
7472
75- def _rename_bellevue_water (path : Path , dir_dest : Path , text_first_page : str ) -> Path :
73+ def _rename_bellevue_water (path : Path , text_first_page : str ) -> Path :
7674 m = re .search (r"Bill Date: (\d{1,2}/\d{1,2}/\d{4})" , text_first_page )
7775 date = datetime .datetime .strptime (m .group (1 ), "%m/%d/%Y" ).strftime (FMT )
78- path_new = dir_dest / f"bellevue_water_{ date } .pdf"
76+ path_new = path . with_name ( f"bellevue_water_{ date } .pdf" )
7977 path .rename (path_new )
8078 return path_new
8179
8280
83- def rename (pdf : str | Path , dir_dest : str | Path ) -> Path :
81+ def rename (pdf : str | Path ) -> Path :
8482 """Rename a PDF file automatically based on its content.
8583
8684 :param pdf: The path of the PDF file.
8785 :return: The path of the renamed PDF file.
8886 """
8987 if isinstance (pdf , str ):
9088 pdf = Path (pdf )
91- if isinstance (dir_dest , str ):
92- dir_dest = Path (dir_dest )
9389 text = extract_text_first_page (pdf )
90+ pdf_new = pdf
9491 if "Puget Sound Energy" in text :
95- return _rename_puget_sound_energy (pdf , dir_dest , text )
96- if "MyUtilityBill.bellevuewa.gov" in text :
97- return _rename_bellevue_water (pdf , dir_dest , text )
92+ pdf_new = _rename_puget_sound_energy (pdf , text )
93+ elif "MyUtilityBill.bellevuewa.gov" in text :
94+ pdf_new = _rename_bellevue_water (pdf , text )
95+ print (f"{ pdf } ==> { pdf_new } " )
96+ return pdf_new
9897
9998
10099def rename_dir (
101100 dir_ : str | Path , seconds_wait : float = 0.1 , seconds_total : float = 3600
102101):
103102 if isinstance (dir_ , str ):
104103 dir_ = Path (dir_ )
105- dir_dest = dir_ / "_rename"
104+ processed = set ()
106105 time_begin = time .time ()
107106 while True :
108107 if time .time () - time_begin > seconds_total :
109108 break
110109 time .sleep (seconds_wait )
111110 for path in dir_ .iterdir ():
111+ if path in processed :
112+ continue
112113 if path .suffix .lower () == ".pdf" :
113- rename (path , dir_dest )
114+ processed . add ( rename (path ) )
0 commit comments