v0.6.1 merge conflict fixes

amogh7joshi · amogh7joshi · commit bd6e71e91541 · 2024-05-01T15:59:35.000-04:00
diff --git a/README.md b/README.md
@@ -156,6 +156,7 @@ model.run_training(loader)
 [ghai_broccoli_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_broccoli_detection.md) | Object Detection | 500 |
 [bean_synthetic_earlygrowth_aerial](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/bean_synthetic_earlygrowth_aerial.md) | Semantic Segmentation | 2500 |
 [ghai_strawberry_fruit_detection](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/ghai_strawberry_fruit_detection.md) | Object Detection | 500 |
+[vegann_multicrop_presence_segmentation](https://github.com/Project-AgML/AgML/blob/main/docs/datasets/vegann_multicrop_presence_segmentation.md) | Semantic Segmentation | 3775 |
 
 ## Usage Information
 
@@ -186,4 +187,4 @@ a bug or feature that you would like to see implemented, please don't hesitate t
 See the [contributing guidelines](/CONTRIBUTING.md) for more information.
 
 ## Funding
-This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.ed
+This project is partly funded by the [National AI Institute for Food Systems (AIFS)](https://aifs.ucdavis.edu).
diff --git a/agml/__init__.py b/agml/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = '0.6.0'
+__version__ = '0.6.1'
 __all__ = ['data', 'synthetic', 'backend', 'viz', 'io']
 
 
diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
@@ -254,31 +254,30 @@
         "annotation_format": "directory_names",
         "n_images": "17509",
         "docs_url": "https://github.com/AlexOlsen/DeepWeeds",
+        "classes": {
+            "0": "Chinee Apple",
+            "1": "Lantana",
+            "2": "Parkinsonia",
+            "3": "Parthenium",
+            "4": "Prickly Acacia",
+            "5": "Rubber Vine",
+            "6": "Siam Weed",
+            "7": "Snake Weed",
+            "8": "Negative"
+        },
+        "external_image_sources": [],
         "stats": {
             "mean": [
-                0.3785816431045532,
-                0.38957422971725464,
-                0.3797682821750641
+                0.3785804808139801,
+                0.3895738422870636,
+                0.37976858019828796
             ],
             "std": [
-                0.22421954572200775,
-                0.22450360655784607,
-                0.22274591028690338
+                0.22421938180923462,
+                0.2245042622089386,
+                0.22274629771709442
             ]
-        },
-        "classes": {
-            "0": "no_weeds",
-            "1": "chinee_apple",
-            "2": "lantana",
-            "3": "parkinsonia",
-            "4": "parthenium",
-            "5": "prickly_acacia",
-            "6": "rubber_vine",
-            "7": "siam_weed",
-            "8": "snake_weed",
-            "9": "negative"
-        },
-        "external_image_sources": []
+        }
     },
     "fruit_detection_worldwide": {
         "ml_task": "object_detection",
@@ -1270,5 +1269,35 @@
                 0.18042609095573425
             ]
         }
+    },
+    "vegann_multicrop_presence_segmentation": {
+        "classes": {
+            "1": "plant"
+        },
+        "ml_task": "semantic_segmentation",
+        "ag_task": "vegetation_segmentation",
+        "location": {
+            "continent": "worldwide",
+            "country": "worldwide"
+        },
+        "sensor_modality": "rgb",
+        "real_synthetic": "real",
+        "platform": "aerial",
+        "input_data_format": "png",
+        "annotation_format": "image",
+        "n_images": "3775",
+        "docs_url": "https://zenodo.org/records/7636408",
+        "stats": {
+            "mean": [
+                16.39170265197754,
+                18.68538475036621,
+                14.235136985778809
+            ],
+            "std": [
+                4.590310573577881,
+                5.818269729614258,
+                4.1895012855529785
+            ]
+        }
     }
 }
diff --git a/agml/_assets/shape_info.pickle b/agml/_assets/shape_info.pickle
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
@@ -142,5 +142,9 @@
     "ghai_strawberry_fruit_detection": {
         "license": "CC BY-SA 4.0",
         "citation": ""
+    },
+    "vegann_multicrop_presence_segmentation": {
+        "license": "CC BY-SA 4.0",
+        "citation": " @article{Madec_Irfan_Velumani_Baret_David_Daubige_Samatan_Serouart_Smith_James_et al._2023, title={VegAnn, Vegetation Annotation of multi-crop RGB images acquired under diverse conditions for segmentation}, volume={10}, ISSN={2052-4463}, url={https://www.nature.com/articles/s41597-023-02098-y}, DOI={10.1038/s41597-023-02098-y}, abstractNote={Abstract\n            \n              Applying deep learning to images of cropping systems provides new knowledge and insights in research and commercial applications. Semantic segmentation or pixel-wise classification, of RGB images acquired at the ground level, into vegetation and background is a critical step in the estimation of several canopy traits. Current state of the art methodologies based on convolutional neural networks (CNNs) are trained on datasets acquired under controlled or indoor environments. These models are unable to generalize to real-world images and hence need to be fine-tuned using new labelled datasets. This motivated the creation of the VegAnn -\n              Veg\n              etation\n              Ann\n              otation - dataset, a collection of 3775 multi-crop RGB images acquired for different phenological stages using different systems and platforms in diverse illumination conditions. We anticipate that VegAnn will help improving segmentation algorithm performances, facilitate benchmarking and promote large-scale crop vegetation segmentation research.}, number={1}, journal={Scientific Data}, author={Madec, Simon and Irfan, Kamran and Velumani, Kaaviya and Baret, Frederic and David, Etienne and Daubige, Gaetan and Samatan, Lucas Bernigaud and Serouart, Mario and Smith, Daniel and James, Chrisbin and Camacho, Fernando and Guo, Wei and De Solan, Benoit and Chapman, Scott C. and Weiss, Marie}, year={2023}, month=may, pages={302}, language={en} }\n"
     }
 }
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
@@ -112,10 +112,10 @@ def rangeland_weeds_australia(self, dataset_name):
             os.makedirs(os.path.join(
                 processed_dir, unique_label.title()), exist_ok = True)
         for file in tqdm(images, desc = "Moving Images", file = sys.stdout):
-            save_dir = df.loc[df['Filename'] == file]['Species'].values[0].title()
+            save_dir = df.loc[df['Filename'] == os.path.basename(file)]['Species'].values[0].title()
             shutil.copyfile(
                 os.path.join(dataset_dir, 'images', file),
-                os.path.join(processed_dir, save_dir, file)
+                os.path.join(processed_dir, save_dir, os.path.basename(file))
             )
 
     def fruit_detection_worldwide(self, dataset_name):
@@ -1029,6 +1029,62 @@ def ghai_strawberry_fruit_detection(self, dataset_name):
         shutil.move(os.path.join(original_dir, 'coco.json'),
                     os.path.join(processed_dir, 'annotations.json'))
 
+    def vegann_multicrop_presence_segmentation(self, dataset_name):
+        # Create processed directories
+        original_dir = os.path.join(self.data_original_dir, dataset_name)
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        processed_image_dir = os.path.join(processed_dir, 'images')
+        os.makedirs(processed_image_dir, exist_ok = True)
+        processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+        os.makedirs(processed_annotation_dir, exist_ok = True)
+
+        # Move images
+        for image in tqdm(glob.glob(os.path.join(original_dir, 'images', '*.png'))):
+            shutil.copyfile(image, os.path.join(processed_image_dir, os.path.basename(image)))
+
+        # Read annotations
+        for annotation_file in tqdm(glob.glob(os.path.join(original_dir, 'annotations', '*.png'))):
+            annotation = cv2.imread(annotation_file, cv2.IMREAD_UNCHANGED)
+            annotation = np.where(annotation == 255, 1, 0)
+            cv2.imwrite(os.path.join(processed_annotation_dir,
+                                     os.path.basename(annotation_file)), annotation)
+
+        # Read the CSV file containing the splits
+        split_csv = pd.read_csv(os.path.join(original_dir, 'VegAnn_dataset.csv'), sep=';')
+
+        # Get the `Name` and `TVT-split{n}` columns for each n, and save the splits to a folder
+        splits_folder = os.path.join(processed_dir, '.splits')
+        os.makedirs(splits_folder, exist_ok = True)
+        column_pairs = [['Name', f'TVT-split{i}'] for i in range(1, 5 + 1)]
+
+        splits = {}
+        for column_pair in column_pairs:
+            columns = split_csv[column_pair]
+            train_images = columns[columns[column_pair[1]] == 'Training']['Name']
+            test_images = columns[columns[column_pair[1]] == 'Test']['Name']
+            splits[column_pair[1]] = {
+                'train': {os.path.join('images', i): os.path.join('annotations', i) for i in train_images},
+                'val': {},
+                'test': {os.path.join('images', i): os.path.join('annotations', i) for i in test_images}
+            }
+
+        # Save each split to a JSON file
+        for split_name, split in splits.items():
+            with open(os.path.join(splits_folder, f'{split_name}.json'), 'w') as f:
+                json.dump(split, f)
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 if __name__ == '__main__':
     # Initialize program arguments.
@@ -1045,7 +1101,7 @@ def ghai_strawberry_fruit_detection(self, dataset_name):
     print("Processing dataset")
     p.preprocess(args.dataset)
     print("Converting dataset")
-    os.chdir(f'{args.data_dir}/processed')
-    os.system(f'zip -r {args.dataset}.zip {args.dataset} -x ".*" -x "__MACOSX"')
+    # os.chdir(f'{args.data_dir}/processed')
+    # os.system(f'zip -r {args.dataset}.zip {args.dataset} -x ".*" -x "__MACOSX"')
 
 
diff --git a/agml/data/loader.py b/agml/data/loader.py
@@ -1234,10 +1234,27 @@ def save_split(self, name, overwrite = False):
             raise NotImplementedError("Cannot save a split of data when no "
                                       "split has been generated.")
 
-        # Get each of the individual splits.
-        splits = {'train': self._train_content,
-                  'val': self._val_content,
-                  'test': self._test_content}
+        # Get each of the individual splits, and for semantic segmentation/image
+        # classification, remove the full paths and only save the path relative
+        # to the dataset root (so only the file and its directory are saved).
+        splits = {}
+        if self._info.tasks.ml == 'image_classification':
+            for split in ['train', 'val', 'test']:
+                contents = getattr(self, f'_{split}_content')
+                if contents is not None:
+                    contents = {
+                        os.path.relpath(c, self.dataset_root): v for c, v in contents.items()
+                    }
+                splits[split] = contents
+        elif self._info.tasks.ml == 'semantic_segmentation':
+            for split in ['train', 'val', 'test']:
+                contents = getattr(self, f'_{split}_content')
+                if contents is not None:
+                    contents = {
+                        os.path.relpath(c, self.dataset_root):
+                            os.path.relpath(v, self.dataset_root) for c, v in contents.items()
+                    }
+                splits[split] = contents
 
         # Save the split to the internal location.
         split_dir = os.path.join(SUPER_BASE_DIR, 'splits', self.name)
@@ -1258,6 +1275,10 @@ def load_split(self, name, **kwargs):
         use the traditional split accessors (`train_data`, `val_data`, and
         `test_data`) to access the loaded data.
 
+        You can also load a pre-defined split for the dataset by using its name
+        (any potential such splits can be found in the dataset info, and are
+        derived from the original dataset).
+
         Parameters
         ----------
         name: str
@@ -1271,14 +1292,33 @@ def load_split(self, name, **kwargs):
             # Ensure that the split exists.
             split_dir = os.path.join(SUPER_BASE_DIR, 'splits', self.name)
             if not os.path.exists(os.path.join(split_dir, f'{name}.json')):
-                raise FileNotFoundError(f"Could not find a split with the name {name}.")
+                split_dir = os.path.join(self.dataset_root, '.splits')
+                if not os.path.exists(os.path.join(split_dir, f'{name}.json')):
+                    raise FileNotFoundError(f"Could not find a split with the name {name}.")
 
             # Load the split from the internal location.
             with open(os.path.join(split_dir, f'{name}.json'), 'r') as f:
                 splits = json.load(f)
 
         # Set the split contents.
         for split, content in splits.items():
+            # If the data is for image classification or semantic segmentation,
+            # then we need to re-construct the full paths to the images.
+            if len(content) > 0:
+                first_item = list(content.items())[0]
+                if not os.path.isabs(first_item[0]):  # backwards compatibility
+                    if self._info.tasks.ml == 'image_classification':
+                        content = {
+                            os.path.join(self.dataset_root, c): v for c, v in content.items()
+                        }
+                    elif self._info.tasks.ml == 'semantic_segmentation':
+                        content = {
+                            os.path.join(self.dataset_root, c): os.path.join(self.dataset_root, v)
+                            for c, v in content.items()
+                        }
+            else:
+                content = None
+
             setattr(self, f'_{split}_content', content)
 
     def batch(self, batch_size = None):
diff --git a/agml/viz/labels.py b/agml/viz/labels.py
@@ -116,7 +116,11 @@ def show_images_and_labels(images,
         plt.setp(ax.spines.values(), visible = False)
         ax.set_xlabel(label)
 
+        # decrease the label size (if it's too big)
+        ax.xaxis.label.set_size(8)
+
     # Display and return the image.
+    fig.tight_layout()
     image = convert_figure_to_image()
     if not kwargs.get('no_show', False):
         _ = display_image(image)
diff --git a/docs/datasets/vegann_multicrop_presence_segmentation.md b/docs/datasets/vegann_multicrop_presence_segmentation.md
@@ -0,0 +1,23 @@
+
+# `vegann_multicrop_presence_segmentation`
+
+## Dataset Metadata
+
+| Metadata | Value |
+| --- | --- |
+| **Classes** | plant |
+| **Machine Learning Task** | semantic_segmentation |
+| **Agricultural Task** | vegetation_segmentation |
+| **Location** | Worldwide |
+| **Sensor Modality** | RGB |
+| **Real or Synthetic** | real |
+| **Platform** | aerial |
+| **Input Data Format** | PNG |
+| **Annotation Format** | image |
+| **Number of Images** | 3775 |
+| **Documentation** | https://zenodo.org/records/7636408 |
+
+
+## Examples
+
+![Example Images for vegann_multicrop_presence_segmentation](https://github.com/Project-AgML/AgML/blob/main/docs/sample_images/vegann_multicrop_presence_segmentation_examples.png)
diff --git a/docs/sample_images/vegann_multicrop_presence_segmentation_examples.png b/docs/sample_images/vegann_multicrop_presence_segmentation_examples.png

Original file line number	Diff line number	Diff line change
`@@ -142,5 +142,9 @@`
`142`	`142`	`"ghai_strawberry_fruit_detection": {`
`143`	`143`	`"license": "CC BY-SA 4.0",`
`144`	`144`	`"citation": ""`
	`145`	`+ },`
	`146`	`+ "vegann_multicrop_presence_segmentation": {`
	`147`	`+ "license": "CC BY-SA 4.0",`
	`148`	+ "citation": " @article{Madec_Irfan_Velumani_Baret_David_Daubige_Samatan_Serouart_Smith_James_et al._2023, title={VegAnn, Vegetation Annotation of multi-crop RGB images acquired under diverse conditions for segmentation}, volume={10}, ISSN={2052-4463}, url={https://www.nature.com/articles/s41597-023-02098-y}, DOI={10.1038/s41597-023-02098-y}, abstractNote={Abstract\n \n Applying deep learning to images of cropping systems provides new knowledge and insights in research and commercial applications. Semantic segmentation or pixel-wise classification, of RGB images acquired at the ground level, into vegetation and background is a critical step in the estimation of several canopy traits. Current state of the art methodologies based on convolutional neural networks (CNNs) are trained on datasets acquired under controlled or indoor environments. These models are unable to generalize to real-world images and hence need to be fine-tuned using new labelled datasets. This motivated the creation of the VegAnn -\n Veg\n etation\n Ann\n otation - dataset, a collection of 3775 multi-crop RGB images acquired for different phenological stages using different systems and platforms in diverse illumination conditions. We anticipate that VegAnn will help improving segmentation algorithm performances, facilitate benchmarking and promote large-scale crop vegetation segmentation research.}, number={1}, journal={Scientific Data}, author={Madec, Simon and Irfan, Kamran and Velumani, Kaaviya and Baret, Frederic and David, Etienne and Daubige, Gaetan and Samatan, Lucas Bernigaud and Serouart, Mario and Smith, Daniel and James, Chrisbin and Camacho, Fernando and Guo, Wei and De Solan, Benoit and Chapman, Scott C. and Weiss, Marie}, year={2023}, month=may, pages={302}, language={en} }\n"
`145`	`149`	`}`
`146`	`150`	`}`