Merge pull request #10 from Project-AgML/dev

amogh7joshi · web-flow · commit 06730f3c2b52 · 2022-02-16T18:40:28.000-05:00
Release 0.2.5
diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
@@ -799,5 +799,98 @@
                 0.20582404732704163
             ]
         }
+    },
+    "guava_disease_pakistan": {
+        "classes": {
+            "1": "Canker",
+            "2": "Dot",
+            "3": "Mummification",
+            "4": "Rust"
+        },
+        "ml_task": "image_classification",
+        "ag_task": "disease_classification",
+        "location": {
+            "continent": "asia",
+            "country": "pakistan"
+        },
+        "sensor_modality": "rgb",
+        "platform": "ground",
+        "input_data_format": "jpg",
+        "annotation_format": "directory_names",
+        "n_images": "306",
+        "docs_url": "https://data.mendeley.com/datasets/s8x6jn5cvr/1",
+        "external_image_sources": [],
+        "stats": {
+            "mean": [
+                0.44554805755615234,
+                0.4508753716945648,
+                0.3228892683982849
+            ],
+            "std": [
+                0.18661099672317505,
+                0.18359656631946564,
+                0.18744423985481262
+            ]
+        }
+    },
+    "apple_detection_spain": {
+        "classes": {
+            "1": "apple"
+        },
+        "ml_task": "object_detection",
+        "ag_task": "fruit_detection",
+        "location": {
+            "continent": "europe",
+            "country": "spain"
+        },
+        "sensor_modality": "rgb",
+        "platform": "ground",
+        "input_data_format": "jpg",
+        "annotation_format": "coco_json",
+        "n_images": "967",
+        "docs_url": "https://www.grap.udl.cat/en/publications/KFuji_RGBDS_database.html",
+        "external_image_sources": [],
+        "stats": {
+            "mean": [
+                0.3745550215244293,
+                0.4699203670024872,
+                0.3931747078895569
+            ],
+            "std": [
+                0.2724320888519287,
+                0.27973315119743347,
+                0.2829023003578186
+            ]
+        }
+    },
+    "apple_detection_drone_brazil": {
+        "classes": {
+            "1": "apple"
+        },
+        "ml_task": "object_detection",
+        "ag_task": "fruit_detection",
+        "location": {
+            "continent": "south_america",
+            "country": "brazil"
+        },
+        "sensor_modality": "rgb",
+        "platform": "ground",
+        "input_data_format": "jpg",
+        "annotation_format": "coco_json",
+        "n_images": "689",
+        "docs_url": "https://github.com/thsant/add256/tree/zenodo-1.0",
+        "external_image_sources": [],
+        "stats": {
+            "mean": [
+                0.3362397849559784,
+                0.48177945613861084,
+                0.32026395201683044
+            ],
+            "std": [
+                0.18435567617416382,
+                0.1873108297586441,
+                0.1734534204006195
+            ]
+        }
     }
 }
diff --git a/agml/_assets/shape_info.pickle b/agml/_assets/shape_info.pickle
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
@@ -86,5 +86,17 @@
     "grape_detection_californianight": {
         "license": "",
         "citation": "@misc{GrapeNight,\n  author    = {Plant AI and Biophysics Lab},\n  title     = {Grape Detection 2020 Night},\n  year      = {2020},\n  url       = {https://github.com/plant-ai-biophysics-lab/AgML} \n "
+    },
+    "guava_disease_pakistan": {
+        "license": "",
+        "citation": "@article{Rauf_Lali_2021, \n    title={A Guava Fruits and Leaves Dataset for Detection and Classification of Guava Diseases through Machine Learning}, \n    volume={1}, \n    url={https://data.mendeley.com/datasets/s8x6jn5cvr/1}, \n    DOI={10.17632/s8x6jn5cvr.1}, \n    abstractNote={(1) Plant diseases are the primary cause of reduced productivity in agriculture, which results in economic losses. Guava is a big source of nutrients for humans all over the world. Guava diseases, on the other hand, harm the yield and quality of the crop. (2) For the identification and classification of plant diseases, computer vision and image processing methods have been commonly used. (3) The dataset includes an image gallery of healthy and unhealthy Guava fruits and leaves that could be used by researchers to adopt advanced computer vision techniques to protect plants from disease. Dot, Canker, Mummification, and Rust are the diseases targeted in the data sets. (4) The dataset contains 306 images of healthy and unhealthy images for both Guava fruits and leaves collectively. Each image contains 6000 * 4000 dimensions with 300 dpi resolution. (5) All images were acquired from the tropical areas of Pakistan under the supervision of Prof. Dr. Ikramullah Lali. (6) All images were annotated manually by the domain expert such as For Guava fruits and leaves; Dot (76), Canker (77), Mummification (83), and Rust (70) Note: The data labeling was manual and can be updated by automatic labeling through machine learning. In the meantime, the authors can also use the data set for the clustering problem.}, \n    author={Rauf, Hafiz Tayyab and Lali, Muhammad Ikram Ullah}, \n    year={2021}, month={Apr} \n}\n"
+    },
+    "apple_detection_spain": {
+        "license": "",
+        "citation": "@article{GENEMOLA2019104289,\ntitle = {KFuji RGB-DS database: Fuji apple multi-modal images for fruit detection with color, depth and range-corrected IR data},\njournal = {Data in Brief},\nvolume = {25},\npages = {104289},\nyear = {2019},\nissn = {2352-3409},\ndoi = {https://doi.org/10.1016/j.dib.2019.104289},\nurl = {https://www.sciencedirect.com/science/article/pii/S2352340919306432},\nauthor = {Jordi Gené-Mola and Verónica Vilaplana and Joan R. Rosell-Polo and Josep-Ramon Morros and Javier Ruiz-Hidalgo and Eduard Gregorio},\nkeywords = {Multi-modal dataset, Fruit detection, Depth cameras, RGB-D, Fruit reflectance, Fuji apple},\nabstract = {This article contains data related to the research article entitle “Multi-modal Deep Learning for Fruit Detection Using RGB-D Cameras and their Radiometric Capabilities” [1]. The development of reliable fruit detection and localization systems is essential for future sustainable agronomic management of high-value crops. RGB-D sensors have shown potential for fruit detection and localization since they provide 3D information with color data. However, the lack of substantial datasets is a barrier for exploiting the use of these sensors. This article presents the KFuji RGB-DS database which is composed by 967 multi-modal images of Fuji apples on trees captured using Microsoft Kinect v2 (Microsoft, Redmond, WA, USA). Each image contains information from 3 different modalities: color (RGB), depth (D) and range corrected IR intensity (S). Ground truth fruit locations were manually annotated, labeling a total of 12,839 apples in all the dataset. The current dataset is publicly available at http://www.grap.udl.cat/publicacions/datasets.html.}\n}"
+    },
+    "apple_detection_drone_brazil": {
+        "license": "CC BY-SA 4.0",
+        "citation": "@article{DBLP:journals/corr/abs-2110-12331,\n  author    = {Thiago T. Santos and\n               Luciano Gebler},\n  title     = {A methodology for detection and localization of fruits in apples orchards\n               from aerial images},\n  journal   = {CoRR},\n  volume    = {abs/2110.12331},\n  year      = {2021},\n  url       = {https://arxiv.org/abs/2110.12331},\n  eprinttype = {arXiv},\n  eprint    = {2110.12331},\n  timestamp = {Thu, 28 Oct 2021 15:25:31 +0200},\n  biburl    = {https://dblp.org/rec/journals/corr/abs-2110-12331.bib},\n  bibsource = {dblp computer science bibliography, https://dblp.org}\n}"
     }
 }
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
@@ -380,7 +380,7 @@ def cotton_seedling_counting(self, dataset_name):
            if path not in valid_paths:
                continue
            shutil.copyfile(path, os.path.join(processed_img_dir, os.path.basename(path)))
-       with open(os.path.join(processed_dir, 'labels.json'), 'w') as f:
+       with open(os.path.join(processed_dir, 'annotations.json'), 'w') as f:
            json.dump(all_annotation_data, f, indent = 4)
 
        # Zip the dataset
@@ -573,5 +573,159 @@ def autonomous_greenhouse_regression(self, dataset_name):
         # Save the annotation file.
         with open(os.path.join(out_dir, 'annotations.json'), 'w') as f:
             json.dump(out, f)
+    
+    def guava_disease_pakistan(self, dataset_name):
+        # Get all of the images.
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+        classes = os.listdir(dataset_dir)
+        all_images = []
+        for cls in classes:
+            all_images.extend([
+                os.path.join(dataset_dir, cls, i)
+                for i in os.listdir(os.path.join(dataset_dir, cls))])
+
+        # Resize all of the images.
+        out_dir = os.path.join(self.data_processed_dir, dataset_name)
+        os.makedirs(out_dir, exist_ok = True)
+        for cls in classes:
+            os.makedirs(os.path.join(out_dir, cls), exist_ok = True)
+        for image in tqdm(all_images, 'Resizing Images'):
+            out_image = image.replace('/original/', '/processed/')
+            im = cv2.imread(image, cv2.IMREAD_UNCHANGED)
+            im = cv2.resize(im, (im.shape[1] // 5, im.shape[0] // 5), cv2.INTER_LINEAR)
+            cv2.imwrite(out_image, im)
+
+    def apple_detection_spain(self, dataset_name):
+        # resize the dataset
+        resize = 1.0
+
+        # Read public_datasources.json to get class information
+        datasource_file = os.path.join(os.path.dirname(__file__), "../_assets/public_datasources.json")
+        with open(datasource_file) as f:
+            data = json.load(f)
+            category_info = data[dataset_name]['crop_types']
+            labels_str = []
+            labels_ids = []
+            for info in category_info:
+                labels_str.append(category_info[info])
+                labels_ids.append(int(info))
+
+            name_converter = dict(zip(["Poma"], ["apple"]))  # src -> dst
+            label2id = dict(zip(labels_str, labels_ids))
+
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+        ann_dir = os.path.join(dataset_dir, "preprocessed data/square_annotations1")
+
+        # Get image file and xml file
+        all_files = get_file_list(ann_dir)
+        anno_files = [os.path.join(ann_dir, x) for x in all_files if "xml" in x]
+        img_files = [x.replace(".xml", "hr.jpg").replace("square_annotations1", "images") for x in anno_files]
+
+        # Process annotation files
+        save_dir_anno = os.path.join(self.data_processed_dir, dataset_name, 'annotations')
+        create_dir(save_dir_anno)
+        output_json_file = os.path.join(save_dir_anno, 'instances.json')
+
+        # Process image files
+        output_img_path = os.path.join(self.data_processed_dir, dataset_name, 'images')
+        create_dir(output_img_path)
+
+        general_info = {
+            "description": "KFuji RGB-DS database",
+            "url": "http://www.grap.udl.cat/en/publications/KFuji_RGBDS_database.html",
+            "version": "1.0",
+            "year": 2018,
+            "contributor": "Gené-Mola J, Vilaplana V, Rosell-Polo JR, Morros JR, Ruiz-Hidalgo J, Gregorio E",
+            "date_created": "2018/10/19"
+        }
+
+        convert_xmls_to_cocojson(
+            general_info,
+            annotation_paths = anno_files,
+            img_paths = img_files,
+            label2id = label2id,
+            name_converter = name_converter,
+            output_jsonpath = output_json_file,
+            output_imgpath = output_img_path,
+            extract_num_from_imgid = True
+        )
+
+    def apple_detection_drone_brazil(self, dataset_name):
+        # Get the data directory and rename it if necessary.
+        dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+        if not os.path.exists(dataset_dir):
+            fallback = os.path.join(self.data_original_dir,
+                                    'thsant-add256-68d2f88') # noqa
+            if os.path.exists(fallback):
+                os.rename(fallback, dataset_dir)
+
+        # Get all of the images which have valid annotations.
+        with open(os.path.join(dataset_dir, 'all.json'), 'r') as f:
+            original_annotations = json.load(f)
+        valid_annotations = {k: v for k, v in
+                             original_annotations.items() if v != []}
+
+        # Construct the `images` part of the COCO JSON.
+        image_coco = []
+        image_id_map = {}
+        image_dir = os.path.join(dataset_dir, 'images')
+        for idx, image_name in tqdm(
+                enumerate(valid_annotations.keys()),
+                desc = "Parsing Images", total = len(valid_annotations)):
+            height, width = cv2.imread(os.path.join(image_dir, image_name)).shape[:2]
+            image_coco.append(
+                {'file_name': image_name, 'height': height,
+                 'width': width, 'id': idx})
+            image_id_map[image_name] = idx
+
+        # Construct the `annotations` part of the COCO JSON.
+        annotation_idx = 0
+        annotation_coco = []
+        for image_name, annotation_list in valid_annotations.items():
+            for annotation in annotation_list:
+                # Coordinates are in form (center_x, center_y, radius). We convert
+                # these to (top left x, top left y, width, height)
+                x_c, y_c, r = annotation['cx'], annotation['cy'], annotation['r']
+                x, y = x_c - r, y_c - r
+                w = h = r * 2
+                annotation_coco.append({
+                    'area': w * h, 'iscrowd': 0, 'bbox': [x, y, w, h],
+                    'category_id': 1, 'ignore': 0, 'segmentation': 0,
+                    'image_id': image_id_map[image_name], 'id': annotation_idx})
+                annotation_idx += 1
+
+        # Set up the annotation dictionary.
+        category_info = [{'supercategory': 'none', 'id': 1, 'name': 'apple'}]
+        all_annotation_data = {
+            "images": image_coco, "type": "instances",
+            "annotations": annotation_coco, "categories": category_info,
+            "info": {
+                "description": "apple detection dataset with drone imagery",
+                "url": "https://github.com/thsant/add256/tree/zenodo-1.0",
+                "version": "1.0",
+                "year": 2021,
+                "contributor": "Thiago T. Santos and Luciano Gebler",
+                "date_created": "2021/10/2021"
+            }
+        }
+
+        # Recreate the dataset and zip it
+        processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+        processed_img_dir = os.path.join(processed_dir, 'images')
+        if os.path.exists(processed_dir):
+            shutil.rmtree(processed_dir)
+        os.makedirs(processed_dir, exist_ok = True)
+        os.makedirs(processed_img_dir, exist_ok = True)
+        for path in tqdm(valid_annotations.keys(), desc = "Moving Images"):
+            full_path = os.path.join(image_dir, path)
+            shutil.copyfile(full_path, os.path.join(
+                processed_img_dir, os.path.basename(path)))
+        with open(os.path.join(processed_dir, 'annotations.json'), 'w') as f:
+            json.dump(all_annotation_data, f)
+
+
+
+
 
+PublicDataPreprocessor('../../data_new').preprocess('apple_detection_drone_brazil')
 
diff --git a/agml/data/builder.py b/agml/data/builder.py
@@ -65,6 +65,7 @@ def from_data(cls, contents, info, root):
         obj._info_map = info.class_to_num
         obj._dataset_root = root
         obj._data = contents
+        obj._external_image_sources = info.external_image_sources
         return obj
 
     @property