Skip to content

Commit f84f957

Browse files
redjaxactions-user
andauthored
Dev (#339)
* Fix red_utils.ext imports for time_utils, detect either arrow or pend… (#327) Fix red_utils.ext imports for time_utils, detect either arrow or pendulum * Repo/updates (#328) * Update pre-commit, specify the ruff.ci.toml file * docs: Update repo README. Add docs/ dir for extended README files * Auto-export requirements files * Update developer docs with local testing notes * Update pre-commit * another pre-commit fix --------- Co-authored-by: GitHub Action <[email protected]> * Feat/dataframe utils (#330) * Add/modify dataframe utils, specifically Pandas * Update tests. Rename any references to file_utils -> path_utils * Feat/dataframe utils (#331) * Add/modify dataframe utils, specifically Pandas * Update tests. Rename any references to file_utils -> path_utils * Auto-export requirements files --------- Co-authored-by: GitHub Action <[email protected]> * Fix tests, update .gitignore (#332) * Fix tests, update .gitignore * Auto-export requirements files --------- Co-authored-by: GitHub Action <[email protected]> * Feat/pytests (#333) * Add tests dir to vscode workspace * Fix diskcache utils * Add dict tests * Add hash util tests * Add uuid_util tests * release(v0.2.14): Add dataframe utils. Update tests. * Auto-export requirements files * Fix/path utils (#338) * Dev (#337) * Fix red_utils.ext imports for time_utils, detect either arrow or pend… (#327) Fix red_utils.ext imports for time_utils, detect either arrow or pendulum * Repo/updates (#328) * Update pre-commit, specify the ruff.ci.toml file * docs: Update repo README. Add docs/ dir for extended README files * Auto-export requirements files * Update developer docs with local testing notes * Update pre-commit * another pre-commit fix --------- Co-authored-by: GitHub Action <[email protected]> * Feat/dataframe utils (#330) * Add/modify dataframe utils, specifically Pandas * Update tests. Rename any references to file_utils -> path_utils * Feat/dataframe utils (#331) * Add/modify dataframe utils, specifically Pandas * Update tests. Rename any references to file_utils -> path_utils * Auto-export requirements files --------- Co-authored-by: GitHub Action <[email protected]> * Fix tests, update .gitignore (#332) * Fix tests, update .gitignore * Auto-export requirements files --------- Co-authored-by: GitHub Action <[email protected]> * Feat/pytests (#333) * Add tests dir to vscode workspace * Fix diskcache utils * Add dict tests * Add hash util tests * Add uuid_util tests * release(v0.2.14): Add dataframe utils. Update tests. --------- Co-authored-by: GitHub Action <[email protected]> * Update crawl_dir function * Fix issue with search_str var in crawl_dir._crawl() * Auto-export requirements files --------- Co-authored-by: GitHub Action <[email protected]> --------- Co-authored-by: GitHub Action <[email protected]>
1 parent eca2b3e commit f84f957

File tree

7 files changed

+132
-26
lines changed

7 files changed

+132
-26
lines changed

red_utils/std/path_utils/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from . import constants, operations
4+
from .constants import VALID_RETURN_TYPES
45
from .operations import (
56
crawl_dir,
67
delete_path,

red_utils/std/path_utils/constants.py

+2
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
from __future__ import annotations
2+
3+
VALID_RETURN_TYPES: list[str] = ["all", "files", "dirs"]

red_utils/std/path_utils/operations.py

+101-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
from red_utils.core.constants import JSON_DIR
1111

12+
from .constants import VALID_RETURN_TYPES
13+
1214
def file_ts(fmt: str = "%Y-%m-%d_%H:%M:%S") -> str:
1315
"""Return a formatted timestamp, useful for prepending to dir/file names."""
1416
now: str = datetime.now().strftime(fmt)
@@ -65,6 +67,102 @@ def export_json(
6567

6668

6769
def crawl_dir(
70+
target: Union[str, Path] = None,
71+
filetype_filter: str | None = None,
72+
return_type: str = "all",
73+
) -> Union[dict[str, list[Path]], list[Path]]:
74+
def validate_target(target: Union[str, Path] = target) -> Path:
75+
if target is None:
76+
raise ValueError("Missing a target directory to scan")
77+
if isinstance(target, str):
78+
target: Path = Path(target)
79+
if not target.exists():
80+
msg = FileNotFoundError(f"Could not find directory: {target}")
81+
82+
raise msg
83+
84+
return target
85+
86+
def validate_return_type(
87+
return_type: str = return_type,
88+
VALID_RETURN_TYPES: list[str] = VALID_RETURN_TYPES,
89+
) -> str:
90+
if return_type is None:
91+
raise ValueError("Missing return type")
92+
if not isinstance(return_type, str):
93+
raise TypeError(
94+
f"Invalid type for return_type: ({type(return_type)}). Must be one of {VALID_RETURN_TYPES}"
95+
)
96+
if return_type not in VALID_RETURN_TYPES:
97+
msg = ValueError(
98+
f"Invalid return type: {return_type}. Must be one of: {VALID_RETURN_TYPES}"
99+
)
100+
101+
raise msg
102+
103+
return return_type
104+
105+
def _crawl(
106+
target=target, search_str: str = "**/*", return_type=return_type
107+
) -> Union[dict[str, list[Path]], list[Path]]:
108+
"""Run Path crawl."""
109+
return_obj: dict[str, list[Path]] = {"files": [], "dirs": []}
110+
111+
for i in target.glob(search_str):
112+
if i.is_file():
113+
if return_type in ["all", "files"]:
114+
return_obj["files"].append(i)
115+
else:
116+
pass
117+
else:
118+
if return_type in ["all", "dirs"]:
119+
return_obj["dirs"].append(i)
120+
121+
match return_type:
122+
case "all":
123+
return return_obj
124+
case "files":
125+
return return_obj["files"]
126+
case "dirs":
127+
return return_obj["dirs"]
128+
129+
if filetype_filter:
130+
if not isinstance(filetype_filter, str):
131+
raise TypeError(
132+
f"Invalid type for filetype_filter: ({type(filetype_filter)}). Must be of type str"
133+
)
134+
if not filetype_filter.startswith("."):
135+
filetype_filter: str = f".{filetype_filter}"
136+
137+
search_str: str = f"**/*{filetype_filter}"
138+
else:
139+
search_str: str = "**/*"
140+
141+
target: Path = validate_target()
142+
return_type: str = validate_return_type()
143+
144+
return_obj = _crawl(target=target, search_str=search_str, return_type=return_type)
145+
146+
return return_obj
147+
148+
# return_obj: dict[str, list[Path]] = {"files": [], "dirs": []}
149+
150+
# for i in target.glob(search_str):
151+
# if i.is_file():
152+
# return_obj["files"].append(i)
153+
# else:
154+
# return_obj["dirs"].append(i)
155+
156+
# match return_type:
157+
# case "all":
158+
# return return_obj
159+
# case "files":
160+
# return return_obj["files"]
161+
# case "dirs":
162+
# return return_obj["dirs"]
163+
164+
165+
def crawl_dir_old(
68166
in_dir: Union[str, Path] = None,
69167
return_type: str = "all",
70168
ext_filter: str | None = None,
@@ -88,7 +186,7 @@ def crawl_dir(
88186
found in path, including in subdirectories. return_obj['dirs'] will be a list of
89187
dirs and subdirs found during crawl.
90188
"""
91-
valid_return_types: list[str] = ["all", "files", "dirs"]
189+
VALID_RETURN_TYPES: list[str] = ["all", "files", "dirs"]
92190
if not return_type:
93191
return_type = "all"
94192

@@ -97,9 +195,9 @@ def crawl_dir(
97195

98196
return_type = return_type.lower()
99197

100-
if return_type not in valid_return_types:
198+
if return_type not in VALID_RETURN_TYPES:
101199
raise ValueError(
102-
f"Invalid return type: {return_type}. Must be one of {valid_return_types}"
200+
f"Invalid return type: {return_type}. Must be one of {VALID_RETURN_TYPES}"
103201
)
104202

105203
if ext_filter is not None:

requirements.ci.txt

+7-5
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ cfgv==3.4.0
1414
charset-normalizer==3.3.2
1515
click==8.1.7
1616
colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows"
17+
dep-logic==0.0.4
1718
diskcache==5.6.3
1819
distlib==0.3.8
1920
filelock==3.13.1
2021
findpython==0.4.1
21-
frozenlist==1.4.0; sys_platform != "win32" or implementation_name != "pypy" and extra == "d"
22+
frozenlist==1.4.1; sys_platform != "win32" or implementation_name != "pypy" and extra == "d"
2223
h11==0.14.0
2324
httpcore==1.0.2
2425
httpx==0.25.2
@@ -35,9 +36,9 @@ mypy-extensions==1.0.0
3536
nodeenv==1.8.0
3637
packaging==23.2
3738
pathspec==0.12.1
38-
pdm==2.10.4
39+
pdm==2.11.1
3940
pdm-bump==0.7.3
40-
pendulum==2.1.2
41+
pendulum==3.0.0
4142
platformdirs==4.1.0
4243
pluggy==1.3.0
4344
pre-commit==3.6.0
@@ -47,20 +48,21 @@ pyproject-metadata==0.7.1
4748
pytest==7.4.3
4849
python-dateutil==2.8.2
4950
python-dotenv==1.0.0
50-
pytzdata==2020.1
5151
pyyaml==6.0.1
5252
requests==2.31.0
5353
requests-toolbelt==1.0.0
5454
resolvelib==1.0.1
5555
rich==13.7.0
56-
ruff==0.1.7
56+
ruff==0.1.8
5757
setuptools==69.0.2
5858
shellingham==1.5.4
5959
six==1.16.0
6060
sniffio==1.3.0
61+
time-machine==2.13.0; implementation_name != "pypy"
6162
tomli-w==1.0.0
6263
tomlkit==0.12.3
6364
truststore==0.8.0; python_version >= "3.10"
65+
tzdata==2023.3
6466
unearth==0.12.1
6567
urllib3==2.1.0
6668
virtualenv==20.25.0

requirements.dev.txt

+7-5
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ cfgv==3.4.0
1414
charset-normalizer==3.3.2
1515
click==8.1.7
1616
colorama==0.4.6; sys_platform == "win32" or platform_system == "Windows"
17+
dep-logic==0.0.4
1718
diskcache==5.6.3
1819
distlib==0.3.8
1920
filelock==3.13.1
2021
findpython==0.4.1
21-
frozenlist==1.4.0; sys_platform != "win32" or implementation_name != "pypy" and extra == "d"
22+
frozenlist==1.4.1; sys_platform != "win32" or implementation_name != "pypy" and extra == "d"
2223
h11==0.14.0
2324
httpcore==1.0.2
2425
httpx==0.25.2
@@ -35,9 +36,9 @@ mypy-extensions==1.0.0
3536
nodeenv==1.8.0
3637
packaging==23.2
3738
pathspec==0.12.1
38-
pdm==2.10.4
39+
pdm==2.11.1
3940
pdm-bump==0.7.3
40-
pendulum==2.1.2
41+
pendulum==3.0.0
4142
platformdirs==4.1.0
4243
pluggy==1.3.0
4344
pre-commit==3.6.0
@@ -47,20 +48,21 @@ pyproject-metadata==0.7.1
4748
pytest==7.4.3
4849
python-dateutil==2.8.2
4950
python-dotenv==1.0.0
50-
pytzdata==2020.1
5151
pyyaml==6.0.1
5252
requests==2.31.0
5353
requests-toolbelt==1.0.0
5454
resolvelib==1.0.1
5555
rich==13.7.0
56-
ruff==0.1.7
56+
ruff==0.1.8
5757
setuptools==69.0.2
5858
shellingham==1.5.4
5959
six==1.16.0
6060
sniffio==1.3.0
61+
time-machine==2.13.0; implementation_name != "pypy"
6162
tomli-w==1.0.0
6263
tomlkit==0.12.3
6364
truststore==0.8.0; python_version >= "3.10"
65+
tzdata==2023.3
6466
unearth==0.12.1
6567
urllib3==2.1.0
6668
virtualenv==20.25.0

requirements.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ loguru==0.7.2
1313
markdown-it-py==3.0.0
1414
mdurl==0.1.2
1515
msgpack==1.0.7
16-
pendulum==2.1.2
16+
pendulum==3.0.0
1717
pygments==2.17.2
1818
python-dateutil==2.8.2
19-
pytzdata==2020.1
2019
rich==13.7.0
2120
six==1.16.0
2221
sniffio==1.3.0
22+
time-machine==2.13.0; implementation_name != "pypy"
23+
tzdata==2023.3
2324
win32-setctime==1.1.0; sys_platform == "win32"

tests/std_tests/path_util_tests/expect_pass_tests.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_crawl_all(cwd: Path):
3535
assert cwd is not None, "CWD cannot be None."
3636
assert isinstance(cwd, Path), f"CWD must be of type Path, not ({type(cwd)})"
3737

38-
all_crawl: dict[str, list[Path]] = path_utils.crawl_dir(in_dir=cwd)
38+
all_crawl: dict[str, list[Path]] = path_utils.crawl_dir(target=cwd)
3939

4040
assert isinstance(
4141
all_crawl, dict
@@ -49,12 +49,12 @@ def test_crawl_files(cwd: Path):
4949
assert isinstance(cwd, Path), f"CWD must be of type Path, not ({type(cwd)})"
5050

5151
file_crawl: dict[str, list[Path]] = path_utils.crawl_dir(
52-
in_dir=cwd, return_type="files"
52+
target=cwd, return_type="files"
5353
)
5454

5555
assert isinstance(
56-
file_crawl, dict
57-
), f"File crawl response should be a dict, not ({type(file_crawl)})"
56+
file_crawl, list
57+
), f"File crawl response should be a list, not ({type(file_crawl)})"
5858

5959

6060
@mark.file_utils
@@ -64,12 +64,12 @@ def test_crawl_dirs(cwd: Path):
6464
assert isinstance(cwd, Path), f"CWD must be of type Path, not ({type(cwd)})"
6565

6666
dir_crawl: dict[str, list[Path]] = path_utils.crawl_dir(
67-
in_dir=cwd, return_type="dirs"
67+
target=cwd, return_type="dirs"
6868
)
6969

7070
assert isinstance(
71-
dir_crawl, dict
72-
), f"Dir crawl response should be a dict, not ({type(dir_crawl)})"
71+
dir_crawl, list
72+
), f"Dir crawl response should be a list, not ({type(dir_crawl)})"
7373

7474

7575
@mark.file_utils
@@ -78,14 +78,14 @@ def test_crawl_dir_for_py_filetype(cwd: Path):
7878
assert isinstance(cwd, Path), f"CWD must be of type Path, not ({type(cwd)})"
7979

8080
py_crawl: dict[str, list[Path]] = path_utils.crawl_dir(
81-
in_dir=cwd, return_type="files", ext_filter=".py"
81+
target=cwd, return_type="files", filetype_filter=".py"
8282
)
8383

8484
assert isinstance(
85-
py_crawl, dict
86-
), f".py file crawl response should be a dict, not ({type(py_crawl)})"
85+
py_crawl, list
86+
), f".py file crawl response should be a list, not ({type(py_crawl)})"
8787

88-
for f in py_crawl["files"]:
88+
for f in py_crawl:
8989
assert f.is_file(), f"File should have been a file"
9090
assert f.suffix == ".py", f"Filetype should have been .py, not {f.suffix}"
9191

0 commit comments

Comments
 (0)