Skip to content

Commit 74939ee

Browse files
authored
Merge pull request #446 from legendu-net/dev
Merge dev into main
2 parents 33f18d7 + b56cc6a commit 74939ee

File tree

5 files changed

+1478
-1241
lines changed

5 files changed

+1478
-1241
lines changed

aiutil/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""A utils Python package for data scientists."""
22

3-
__version__ = "0.87.1"
3+
__version__ = "0.89.0"

aiutil/filesystem.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,3 +718,25 @@ def _trace_dir_upwards(path: Path) -> Path:
718718
path = Path(path)
719719
prefix = _trace_dir_upwards(path)
720720
return PosixPathPair(prefix, path.relative_to(prefix))
721+
722+
723+
def normalize_path_name(
724+
path: str | Path, replacements: dict[str, str] | None = None
725+
) -> Path:
726+
"""Normalize the name of a path.
727+
728+
:param path: A path to be normalized.
729+
:param replacements: A mapping of characters to replace.
730+
"""
731+
if isinstance(path, str):
732+
path = Path(path)
733+
name = path.name
734+
if replacements is None:
735+
replacements = {
736+
" ": "_",
737+
"(": "_",
738+
")": "_",
739+
}
740+
path_new = path.with_name(name.translate(str.maketrans(replacements)))
741+
path.rename(path_new)
742+
return path_new

aiutil/pdf.py

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import datetime
44
from pathlib import Path
55
import re
6+
import time
67
from typing import Iterable
78
from pypdf import PdfWriter, PdfReader
89
import pdfplumber
@@ -77,16 +78,37 @@ def _rename_bellevue_water(path: Path, text_first_page: str) -> Path:
7778
return path_new
7879

7980

80-
def rename_auto(path: str | Path) -> Path:
81+
def rename(pdf: str | Path) -> Path:
8182
"""Rename a PDF file automatically based on its content.
8283
83-
:param path: The path of the PDF file.
84+
:param pdf: The path of the PDF file.
8485
:return: The path of the renamed PDF file.
8586
"""
86-
if isinstance(path, str):
87-
path = Path(path)
88-
text = extract_text_first_page(path)
87+
if isinstance(pdf, str):
88+
pdf = Path(pdf)
89+
text = extract_text_first_page(pdf)
90+
pdf_new = pdf
8991
if "Puget Sound Energy" in text:
90-
return _rename_puget_sound_energy(path, text)
91-
if "MyUtilityBill.bellevuewa.gov" in text:
92-
return _rename_bellevue_water(path, text)
92+
pdf_new = _rename_puget_sound_energy(pdf, text)
93+
elif "MyUtilityBill.bellevuewa.gov" in text:
94+
pdf_new = _rename_bellevue_water(pdf, text)
95+
print(f"{pdf} ==> {pdf_new}")
96+
return pdf_new
97+
98+
99+
def rename_dir(
100+
dir_: str | Path, seconds_wait: float = 0.1, seconds_total: float = 3600
101+
):
102+
if isinstance(dir_, str):
103+
dir_ = Path(dir_)
104+
processed = set()
105+
time_begin = time.time()
106+
while True:
107+
if time.time() - time_begin > seconds_total:
108+
break
109+
time.sleep(seconds_wait)
110+
for path in dir_.iterdir():
111+
if path in processed:
112+
continue
113+
if path.suffix.lower() == ".pdf":
114+
processed.add(rename(path))

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "aiutil"
3-
version = "0.87.1"
3+
version = "0.89.0"
44
description = "A utils Python package for data scientists."
55
authors = [{ name = "Benjamin Du", email = "[email protected]" }]
66
requires-python = ">=3.10,<3.14"

0 commit comments

Comments
 (0)