Skip to content

Commit 135a65d

Browse files
saskrajwmueller
andauthored
Add optional verbosity argument to suppress prints (#258)
Co-authored-by: Jonas Mueller <[email protected]>
1 parent d6f7384 commit 135a65d

File tree

7 files changed

+31
-7
lines changed

7 files changed

+31
-7
lines changed

src/cleanvision/dataset/fsspec_dataset.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def __init__(
1919
data_folder: Optional[str] = None,
2020
filepaths: Optional[List[str]] = None,
2121
storage_opts: Dict[str, str] = {},
22+
verbose: bool = True,
2223
) -> None:
2324
super().__init__()
2425
self.storage_opts = storage_opts
@@ -32,7 +33,7 @@ def __init__(
3233
self.fs, dataset_path = fsspec.core.url_to_fs(
3334
data_folder, **self.storage_opts
3435
)
35-
self._filepaths = self.__get_filepaths(dataset_path)
36+
self._filepaths = self.__get_filepaths(dataset_path, verbose)
3637
else:
3738
assert filepaths is not None
3839
if len(filepaths) != len(set(filepaths)):
@@ -64,10 +65,11 @@ def get_name(self, item: Union[int, str]) -> str:
6465
assert isinstance(item, str)
6566
return item.split("/")[-1]
6667

67-
def __get_filepaths(self, dataset_path: str) -> List[str]:
68+
def __get_filepaths(self, dataset_path: str, verbose: bool) -> List[str]:
6869
"""See an issue here: https://github.com/fsspec/filesystem_spec/issues/1019
6970
There's a problem with proper patterning on /**/ in fsspec"""
70-
print(f"Reading images from {dataset_path}")
71+
if verbose:
72+
print(f"Reading images from {dataset_path}")
7173
filepaths = []
7274
for ext in IMAGE_FILE_EXTENSIONS:
7375
# initial *.ext search, top level

src/cleanvision/dataset/utils.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@ def build_dataset(
1919
image_key: Optional[str] = None,
2020
torchvision_dataset: Optional["VisionDataset"] = None,
2121
storage_opts: Dict[str, str] = {},
22+
verbose: bool = True,
2223
) -> Dataset:
2324
if data_path:
24-
return FSDataset(data_folder=data_path, storage_opts=storage_opts)
25+
return FSDataset(
26+
data_folder=data_path, storage_opts=storage_opts, verbose=verbose
27+
)
2528
elif filepaths:
26-
return FSDataset(filepaths=filepaths, storage_opts=storage_opts)
29+
return FSDataset(
30+
filepaths=filepaths, storage_opts=storage_opts, verbose=verbose
31+
)
2732
elif hf_dataset and image_key:
2833
return HFDataset(hf_dataset, image_key)
2934
elif torchvision_dataset:

src/cleanvision/imagelab.py

+3
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ def __init__(
124124
image_key: Optional[str] = None,
125125
torchvision_dataset: Optional["VisionDataset"] = None,
126126
storage_opts: Dict[str, Any] = {},
127+
verbose: bool = True,
127128
) -> None:
128129
self._dataset = build_dataset(
129130
data_path,
@@ -132,6 +133,7 @@ def __init__(
132133
image_key,
133134
torchvision_dataset,
134135
storage_opts=storage_opts,
136+
verbose=verbose,
135137
)
136138
if len(self._dataset) == 0:
137139
raise ValueError("No images found in the dataset specified")
@@ -276,6 +278,7 @@ def find_issues(
276278
dataset=self._dataset,
277279
imagelab_info=self.info,
278280
n_jobs=n_jobs,
281+
verbose=verbose,
279282
)
280283

281284
# update issues, issue_summary and info

src/cleanvision/issue_managers/duplicate_issue_manager.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def find_issues(
107107
dataset: Optional[Dataset] = None,
108108
imagelab_info: Optional[Dict[str, Any]] = None,
109109
n_jobs: Optional[int] = None,
110+
verbose: Optional[bool] = None,
110111
**kwargs: Any,
111112
) -> None:
112113
super().find_issues(**kwargs)
@@ -125,7 +126,9 @@ def find_issues(
125126

126127
results: List[Dict[str, Union[str, int]]] = []
127128
if n_jobs == 1:
128-
for idx in tqdm(dataset.index):
129+
for idx in tqdm(
130+
dataset.index, leave=verbose, desc="Computing hashes", smoothing=0
131+
):
129132
results.append(compute_hash(idx, dataset, to_compute, self.params))
130133
else:
131134
args = [
@@ -145,6 +148,9 @@ def find_issues(
145148
compute_hash_wrapper, args, chunksize=chunksize
146149
),
147150
total=len(dataset),
151+
leave=verbose,
152+
desc="Computing hashes",
153+
smoothing=0,
148154
)
149155
)
150156

src/cleanvision/issue_managers/image_property_issue_manager.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def find_issues(
114114
dataset: Optional[Dataset] = None,
115115
imagelab_info: Optional[Dict[str, Any]] = None,
116116
n_jobs: Optional[int] = None,
117+
verbose: Optional[bool] = None,
117118
**kwargs: Any,
118119
) -> None:
119120
super().find_issues(**kwargs)
@@ -138,7 +139,9 @@ def find_issues(
138139
if to_be_computed:
139140
results: List[Dict[str, Union[int, float, str]]] = []
140141
if n_jobs == 1:
141-
for idx in tqdm(dataset.index):
142+
for idx in tqdm(
143+
dataset.index, leave=verbose, desc="Computing scores", smoothing=0
144+
):
142145
results.append(
143146
compute_scores(
144147
idx, dataset, to_be_computed, self.image_properties
@@ -162,6 +165,9 @@ def find_issues(
162165
compute_scores_wrapper, args, chunksize=chunksize
163166
),
164167
total=len(dataset),
168+
leave=verbose,
169+
desc="Computing scores",
170+
smoothing=0,
165171
)
166172
)
167173

src/cleanvision/utils/base_issue_manager.py

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def check_params(**kwargs: Any) -> None:
3232
"dataset": Dataset,
3333
"imagelab_info": Dict[str, Any],
3434
"n_jobs": int,
35+
"verbose": bool,
3536
}
3637

3738
for name, value in kwargs.items():

src/cleanvision/utils/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def get_filepaths(
5151
"""
5252

5353
abs_dir_path = os.path.abspath(os.path.expanduser(dir_path))
54+
# ToDo: Suppress print according to verbosity level
5455
print(f"Reading images from {abs_dir_path}")
5556
filepaths = []
5657
for ext in IMAGE_FILE_EXTENSIONS:

0 commit comments

Comments
 (0)