Skip to content

Commit 9ed7c86

Browse files
authored
Merge pull request #241 from Living-with-machines/update_file_saving
Update file saving - save as csv files
2 parents 445618e + e375dc5 commit 9ed7c86

File tree

17 files changed

+152
-101
lines changed

17 files changed

+152
-101
lines changed

docs/source/Worked-examples/mnist_pipeline.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,7 @@
12371237
"metadata": {},
12381238
"outputs": [],
12391239
"source": [
1240-
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\"\\t\", index_label=\"image_id\")"
1240+
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\",\", index_label=\"image_id\")"
12411241
]
12421242
}
12431243
],

docs/source/Worked-examples/plant_pipeline.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2296,7 +2296,7 @@
22962296
"metadata": {},
22972297
"outputs": [],
22982298
"source": [
2299-
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\"\\t\", index_label=\"image_id\")"
2299+
"predictions_df.to_csv(\"./predictions_df.csv\", sep=\",\", index_label=\"image_id\")"
23002300
]
23012301
},
23022302
{

mapreader/annotate/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ def prepare_annotation(
529529
metadata=annot_file,
530530
index_col=0,
531531
ignore_mismatch=True,
532-
delimiter="\t",
532+
delimiter=",",
533533
tree_level=tree_level,
534534
)
535535

@@ -584,7 +584,7 @@ def prepare_annotation(
584584
metadata=annot_file,
585585
index_col=0,
586586
ignore_mismatch=True,
587-
delimiter="\t",
587+
delimiter=",",
588588
tree_level=tree_level,
589589
)
590590
# convert images to dataframe
@@ -659,7 +659,7 @@ def save_annotation(
659659

660660
# Read an existing annotation file (for the same task and userID)
661661
try:
662-
image_df = pd.read_csv(annot_file, sep="\t", index_col=0)
662+
image_df = pd.read_csv(annot_file, index_col=0)
663663
except:
664664
image_df = pd.DataFrame(columns=["image_id", "image_path", "label"])
665665

@@ -684,7 +684,7 @@ def save_annotation(
684684

685685
if len(image_df) > 0:
686686
#image_df = image_df.set_index("image_id")
687-
image_df.to_csv(annot_file, mode="w", sep="\t")
687+
image_df.to_csv(annot_file, mode="w")
688688
print(f"[INFO] Save {newly_annotated} new annotations to {annot_file}")
689689
print(f"[INFO] {new_labels} labels were not already stored")
690690
print(f"[INFO] Total number of saved annotations: {len(image_df)}")

mapreader/classify/datasets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(
2828
self,
2929
patch_df: Union[pd.DataFrame, str],
3030
transform: Union[str, transforms.Compose, Callable],
31-
delimiter: str = "\t",
31+
delimiter: str = ",",
3232
patch_paths_col: Optional[str] = "image_path",
3333
label_col: Optional[str] = None,
3434
label_index_col: Optional[str] = None,
@@ -47,7 +47,7 @@ def __init__(
4747
and performs image transformations can be used.
4848
At minimum, transform should be ``torchvision.transforms.ToTensor()``.
4949
delimiter : str, optional
50-
The delimiter to use when reading the dataframe. By default ``"\t"``.
50+
The delimiter to use when reading the dataframe. By default ``","``.
5151
patch_paths_col : str, optional
5252
The name of the column in the DataFrame containing the image paths. Default is "image_path".
5353
label_col : str, optional
@@ -329,7 +329,7 @@ def __init__(
329329
patch_df: Union[pd.DataFrame, str],
330330
transform1: str,
331331
transform2: str,
332-
delimiter: str = "\t",
332+
delimiter: str = ",",
333333
patch_paths_col: Optional[str] = "image_path",
334334
label_col: Optional[str] = None,
335335
label_index_col: Optional[str] = None,
@@ -356,7 +356,7 @@ def __init__(
356356
Torchvision transform to be applied to target images.
357357
Either "train" or "val".
358358
delimiter : str
359-
The delimiter to use when reading the csv file. By default ``"\t"``.
359+
The delimiter to use when reading the csv file. By default ``","``.
360360
patch_paths_col : str, optional
361361
The name of the column in the DataFrame containing the image paths. Default is "image_path".
362362
label_col : str, optional

mapreader/classify/load_annotations.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self):
3232
def load(
3333
self,
3434
annotations: Union[str, pd.DataFrame],
35-
delimiter: Optional[str] = "\t",
35+
delimiter: Optional[str] =",",
3636
id_col: Optional[str] = "image_id",
3737
patch_paths_col: Optional[str] = "image_path",
3838
label_col: Optional[str] = "label",
@@ -48,7 +48,7 @@ def load(
4848
The annotations.
4949
Can either be the path to a csv file or a pandas.DataFrame.
5050
delimiter : Optional[str], optional
51-
The delimiter to use when loading the csv file as a dataframe, by default "\t".
51+
The delimiter to use when loading the csv file as a dataframe, by default ",".
5252
id_col : Optional[str], optional
5353
The name of the column which contains the image IDs, by default "image_id".
5454
patch_paths_col : Optional[str], optional
@@ -123,7 +123,7 @@ def load(
123123
def _load_annotations_csv(
124124
self,
125125
annotations: str,
126-
delimiter: Optional[str] = "\t",
126+
delimiter: Optional[str] = ",",
127127
scramble_frame: Optional[bool] = False,
128128
reset_index: Optional[bool] = False,
129129
) -> pd.DataFrame:
@@ -134,7 +134,7 @@ def _load_annotations_csv(
134134
annotations : str
135135
The path to the annotations csv file.
136136
delimiter : Optional[str], optional
137-
The delimiter to use when loading the csv file as a dataframe, by default "\t".
137+
The delimiter to use when loading the csv file as a dataframe, by default ",".
138138
scramble_frame : Optional[bool], optional
139139
Whether to shuffle the rows of the dataframe, by default False.
140140
reset_index : Optional[bool], optional
@@ -488,8 +488,8 @@ def create_datasets(
488488
assert len(self.annotations) == len(df_train) + len(df_val) + len(df_test)
489489

490490
else:
491-
df_val = labels_temp
492-
df_test = None
491+
df_val = df_temp
492+
df_test = pd.DataFrame(columns=self.annotations.columns)
493493
assert len(self.annotations) == len(df_train) + len(df_val)
494494

495495
train_dataset = PatchDataset(
@@ -506,13 +506,12 @@ def create_datasets(
506506
label_col=self.label_col,
507507
label_index_col="label_index",
508508
)
509-
if df_test is not None:
510-
test_dataset = PatchDataset(
511-
df_test,
512-
test_transform,
513-
patch_paths_col=self.patch_paths_col,
514-
label_col=self.label_col,
515-
label_index_col="label_index",
509+
test_dataset = PatchDataset(
510+
df_test,
511+
test_transform,
512+
patch_paths_col=self.patch_paths_col,
513+
label_col=self.label_col,
514+
label_index_col="label_index",
516515
)
517516

518517
datasets = {"train": train_dataset, "val": val_dataset, "test": test_dataset}

mapreader/download/sheet_downloader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -555,13 +555,13 @@ def _save_metadata(
555555
).T
556556

557557
if os.path.exists(out_filepath):
558-
existing_metadata_df = pd.read_csv(out_filepath, sep="\t", index_col=0)
558+
existing_metadata_df = pd.read_csv(out_filepath, sep=",", index_col=0)
559559
metadata_df = pd.concat([existing_metadata_df, new_metadata_df], ignore_index=True)
560560
metadata_df.drop_duplicates(subset=["grid_bb"], keep="first", inplace=True)
561561
else:
562562
metadata_df = new_metadata_df
563563

564-
metadata_df.to_csv(out_filepath, sep="\t")
564+
metadata_df.to_csv(out_filepath, sep=",")
565565

566566
def _download_map_sheets(self, features: list, path_save: Optional[str] = "maps", metadata_fname: Optional[str] = "metadata.csv", overwrite: Optional[bool] = False):
567567
"""Download map sheets from a list of features.

mapreader/load/images.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def add_metadata(
260260
self,
261261
metadata: Union[str, pd.DataFrame],
262262
index_col: Optional[Union[int, str]] = 0,
263-
delimiter: Optional[str] = "\t",
263+
delimiter: Optional[str] = ",",
264264
columns: Optional[List[str]] = None,
265265
tree_level: Optional[str] = "parent",
266266
ignore_mismatch: Optional[bool] = False,
@@ -271,7 +271,7 @@ def add_metadata(
271271
Parameters
272272
----------
273273
metadata : str or pandas.DataFrame
274-
Path to a ``csv``, ``xls`` or ``xlsx`` file or a pandas DataFrame that contains the metadata information.
274+
Path to a ``csv`` (or similar), ``xls`` or ``xlsx`` file or a pandas DataFrame that contains the metadata information.
275275
index_col : int or str, optional
276276
Column to use as the index when reading the file and converting into a panda.DataFrame.
277277
Accepts column indices or column names.
@@ -280,7 +280,7 @@ def add_metadata(
280280
Only used if a file path is provided as the ``metadata`` parameter.
281281
Ignored if ``columns`` parameter is passed.
282282
delimiter : str, optional
283-
Delimiter used in the ``csv`` file, by default ``"\t"``.
283+
Delimiter used in the ``csv`` file, by default ``","``.
284284
285285
Only used if a ``csv`` file path is provided as
286286
the ``metadata`` parameter.
@@ -323,31 +323,32 @@ def add_metadata(
323323

324324
else: #if not df
325325
if os.path.isfile(metadata):
326-
if metadata.endswith('csv'):
326+
if metadata.endswith(('xls', 'xlsx')):
327327
if columns:
328-
metadata_df = pd.read_csv(
329-
metadata, usecols=columns, delimiter=delimiter
328+
metadata_df = pd.read_excel(
329+
metadata, usecols=columns,
330330
)
331331
else:
332-
metadata_df = pd.read_csv(
333-
metadata, index_col=index_col, delimiter=delimiter
332+
metadata_df = pd.read_excel(
333+
metadata, index_col=index_col,
334334
)
335335
columns=list(metadata_df.columns)
336-
337-
elif metadata.endswith(('xls', 'xlsx')):
336+
337+
elif metadata.endswith('sv'): #csv, tsv, etc
338338
if columns:
339-
metadata_df = pd.read_excel(
340-
metadata, usecols=columns,
339+
metadata_df = pd.read_csv(
340+
metadata, usecols=columns, delimiter=delimiter
341341
)
342342
else:
343-
metadata_df = pd.read_excel(
344-
metadata, index_col=index_col,
343+
metadata_df = pd.read_csv(
344+
metadata, index_col=index_col, delimiter=delimiter
345345
)
346346
columns=list(metadata_df.columns)
347+
347348

348349
else:
349350
raise ValueError(
350-
"[ERROR] ``metadata`` should either be the path to a ``csv``, ``xls`` or ``xlsx`` file or a pandas DataFrame." # noqa
351+
"[ERROR] ``metadata`` should either be the path to a ``csv`` (or similar), ``xls`` or ``xlsx`` file or a pandas DataFrame." # noqa
351352
)
352353

353354
# identify image_id column
@@ -1225,7 +1226,12 @@ def calc_pixel_stats(
12251226
# Calculate std pixel values
12261227
self.patches[patch][f"std_pixel_{band}"] = img_std[i] / 255
12271228

1228-
def convert_images(self, save: Optional[bool] = False, save_format: Optional[str] ="csv") -> Tuple[pd.DataFrame, pd.DataFrame]:
1229+
def convert_images(
1230+
self,
1231+
save: Optional[bool] = False,
1232+
save_format: Optional[str] ="csv",
1233+
delimiter: Optional[str]=",",
1234+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
12291235
"""
12301236
Convert the ``MapImages`` instance's ``images`` dictionary into pandas
12311237
DataFrames for easy manipulation.
@@ -1239,6 +1245,8 @@ def convert_images(self, save: Optional[bool] = False, save_format: Optional[str
12391245
If ``save = True``, the file format to use when saving the dataframes.
12401246
Options of csv ("csv") or excel ("excel" or "xlsx").
12411247
By default, "csv".
1248+
delimiter : str, optional
1249+
The delimiter to use when saving the dataframe. By default ``","``.
12421250
12431251
Returns
12441252
-------
@@ -1255,9 +1263,9 @@ def convert_images(self, save: Optional[bool] = False, save_format: Optional[str
12551263
if save:
12561264

12571265
if save_format == "csv":
1258-
parent_df.to_csv("parent_df.csv", sep="\t")
1266+
parent_df.to_csv("parent_df.csv", sep=delimiter)
12591267
print('[INFO] Saved parent dataframe as "parent_df.csv"')
1260-
patch_df.to_csv("patch_df.csv", sep="\t")
1268+
patch_df.to_csv("patch_df.csv", sep=delimiter)
12611269
print('[INFO] Saved patch dataframe as "patch_df.csv"')
12621270
elif save_format in ["excel", "xlsx"]:
12631271
parent_df.to_excel("parent_df.xlsx")
@@ -1872,6 +1880,7 @@ def load_csv(
18721880
clear_images: Optional[bool] = False,
18731881
index_col_patch: Optional[int] = 0,
18741882
index_col_parent: Optional[int] = 0,
1883+
delimiter: Optional[str] = ",",
18751884
) -> None:
18761885
"""
18771886
Load CSV files containing information about parent and patches,
@@ -1891,6 +1900,8 @@ def load_csv(
18911900
Column to set as index for the patch DataFrame, by default ``0``.
18921901
index_col_parent : int, optional
18931902
Column to set as index for the parent DataFrame, by default ``0``.
1903+
delimiter : str, optional
1904+
The delimiter to use when reading the dataframe. By default ``","``.
18941905
18951906
Returns
18961907
-------
@@ -1905,12 +1916,12 @@ def load_csv(
19051916
raise ValueError("[ERROR] Please pass ``patch_path`` as string.")
19061917

19071918
if os.path.isfile(parent_path):
1908-
parent_df = pd.read_csv(parent_path, index_col=index_col_parent)
1919+
parent_df = pd.read_csv(parent_path, index_col=index_col_parent, sep=delimiter)
19091920
else:
19101921
raise ValueError(f"[ERROR] {parent_path} cannot be found.")
19111922

19121923
if os.path.isfile(patch_path):
1913-
patch_df = pd.read_csv(patch_path, index_col=index_col_patch)
1924+
patch_df = pd.read_csv(patch_path, index_col=index_col_patch, sep=delimiter)
19141925
else:
19151926
raise ValueError(f"[ERROR] {patch_path} cannot be found.")
19161927

0 commit comments

Comments
 (0)