Skip to content

Commit b607ba2

Browse files
committed
cache processed (renamed) pfds
1 parent 1fa41f4 commit b607ba2

File tree

2 files changed

+16
-15
lines changed

2 files changed

+16
-15
lines changed

aiutil/pdf.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -62,52 +62,53 @@ def extract_text_first_page(path: str | Path) -> str:
6262
return page.extract_text()
6363

6464

65-
def _rename_puget_sound_energy(
66-
path: Path, dir_dest: Path, text_first_page: str
67-
) -> Path:
65+
def _rename_puget_sound_energy(path: Path, text_first_page: str) -> Path:
6866
m = re.search(r"Issued: (\w+ \d{1,2}, \d{4})", text_first_page)
6967
date = datetime.datetime.strptime(m.group(1), "%B %d, %Y").strftime(FMT)
70-
path_new = dir_dest / f"pse_{date}.pdf"
68+
path_new = path.with_name(f"pse_{date}.pdf")
7169
path.rename(path_new)
7270
return path_new
7371

7472

75-
def _rename_bellevue_water(path: Path, dir_dest: Path, text_first_page: str) -> Path:
73+
def _rename_bellevue_water(path: Path, text_first_page: str) -> Path:
7674
m = re.search(r"Bill Date: (\d{1,2}/\d{1,2}/\d{4})", text_first_page)
7775
date = datetime.datetime.strptime(m.group(1), "%m/%d/%Y").strftime(FMT)
78-
path_new = dir_dest / f"bellevue_water_{date}.pdf"
76+
path_new = path.with_name(f"bellevue_water_{date}.pdf")
7977
path.rename(path_new)
8078
return path_new
8179

8280

83-
def rename(pdf: str | Path, dir_dest: str | Path) -> Path:
81+
def rename(pdf: str | Path) -> Path:
8482
"""Rename a PDF file automatically based on its content.
8583
8684
:param pdf: The path of the PDF file.
8785
:return: The path of the renamed PDF file.
8886
"""
8987
if isinstance(pdf, str):
9088
pdf = Path(pdf)
91-
if isinstance(dir_dest, str):
92-
dir_dest = Path(dir_dest)
9389
text = extract_text_first_page(pdf)
90+
pdf_new = pdf
9491
if "Puget Sound Energy" in text:
95-
return _rename_puget_sound_energy(pdf, dir_dest, text)
96-
if "MyUtilityBill.bellevuewa.gov" in text:
97-
return _rename_bellevue_water(pdf, dir_dest, text)
92+
pdf_new = _rename_puget_sound_energy(pdf, text)
93+
elif "MyUtilityBill.bellevuewa.gov" in text:
94+
pdf_new = _rename_bellevue_water(pdf, text)
95+
print(f"{pdf} ==> {pdf_new}")
96+
return pdf_new
9897

9998

10099
def rename_dir(
101100
dir_: str | Path, seconds_wait: float = 0.1, seconds_total: float = 3600
102101
):
103102
if isinstance(dir_, str):
104103
dir_ = Path(dir_)
105-
dir_dest = dir_ / "_rename"
104+
processed = set()
106105
time_begin = time.time()
107106
while True:
108107
if time.time() - time_begin > seconds_total:
109108
break
110109
time.sleep(seconds_wait)
111110
for path in dir_.iterdir():
111+
if path in processed:
112+
continue
112113
if path.suffix.lower() == ".pdf":
113-
rename(path, dir_dest)
114+
processed.add(rename(path))

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)