Skip to content

Feat/simple augmentation #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions src/super_gradients/common/object_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class Transforms:
DetectionPaddedRescale = "DetectionPaddedRescale"
DetectionTargetsFormatTransform = "DetectionTargetsFormatTransform"
DetectionNormalize = "DetectionNormalize"
DetectionRandomSideCrop = "DetectionRandomSideCrop"
#
RandomResizedCropAndInterpolation = "RandomResizedCropAndInterpolation"
RandAugmentTransform = "RandAugmentTransform"
Expand Down Expand Up @@ -333,6 +334,8 @@ class Dataloaders:
COCO2017_VAL_YOLOX = "coco2017_val_yolox"
COCO2017_TRAIN_YOLO_NAS = "coco2017_train_yolo_nas"
COCO2017_VAL_YOLO_NAS = "coco2017_val_yolo_nas"
COCO_DETECTION_YOLO_FORMAT_TRAIN_CUSTOM = "coco_detection_yolo_format_train_custom"
COCO_DETECTION_YOLO_FORMAT_VAL_CUSTOM = "coco_detection_yolo_format_val_custom"
COCO2017_TRAIN_PPYOLOE = "coco2017_train_ppyoloe"
COCO2017_VAL_PPYOLOE = "coco2017_val_ppyoloe"
COCO2017_TRAIN_SSD_LITE_MOBILENET_V2 = "coco2017_train_ssd_lite_mobilenet_v2"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
train_dataset_params:
data_dir: /data/coco # TO FILL: Where the data is stored.
images_dir: images/train2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`.
labels_dir: labels/train2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`.
classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign,
parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag,
tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard,
tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot,
hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote,
keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear,
hair drier, toothbrush] # TO FILL: List of classes used in your dataset.
input_dim: [1024, 1024]
cache_dir:
cache: False
transforms:
- DetectionRandomSideCrop:
min_rel_width : 1e-16
max_rel_width : 0.5
p_side_right: 0.5
prob: 1.0
# - DetectionMosaic:
# input_dim: ${dataset_params.train_dataset_params.input_dim}
# prob: 1.
# - DetectionRandomAffine:
# degrees: 3. # rotation degrees, randomly sampled from [-degrees, degrees]
# translate: 0.05 # image translation fraction
# shear: 0. # shear degrees, randomly sampled from [-degrees, degrees]
# target_size: ${dataset_params.train_dataset_params.input_dim}
# filter_box_candidates: True # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
# wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
# area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
# ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
# - DetectionMixup:
# input_dim: ${dataset_params.train_dataset_params.input_dim}
# mixup_scale: [ 0.5, 1.5 ] # random rescale range for the additional sample in mixup
# prob: 1.0 # probability to apply per-sample mixup
# flip_prob: 0.5 # probability to apply horizontal flip
- DetectionHSV:
prob: 1.0 # probability to apply HSV transform
hgain: 18 # HSV transform hue gain (randomly sampled from [-hgain, hgain])
sgain: 30 # HSV transform saturation gain (randomly sampled from [-sgain, sgain])
vgain: 30 # HSV transform value gain (randomly sampled from [-vgain, vgain])
- DetectionHorizontalFlip:
prob: 0.3 # probability to apply horizontal flip
- DetectionRescale:
output_shape: ${dataset_params.train_dataset_params.input_dim}
swap: [2,0,1]
- DetectionStandardize:
max_value: 255.
- DetectionTargetsFormatTransform:
input_dim: ${dataset_params.train_dataset_params.input_dim}
output_format: LABEL_CXCYWH

class_inclusion_list:
max_num_samples:

train_dataloader_params:
batch_size: 25
num_workers: 8
shuffle: True
drop_last: True
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

val_dataset_params:
data_dir: /data/coco # TO FILL: Where the data is stored.
images_dir: images/val2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`.
labels_dir: labels/val2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`.
classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign,
parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag,
tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard,
tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot,
hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote,
keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear,
hair drier, toothbrush] # TO FILL: List of classes used in your dataset.
input_dim: [1024, 1024]
cache_dir:
cache: False
transforms:
- DetectionRescale:
output_shape: ${dataset_params.train_dataset_params.input_dim}
swap: [2,0,1]
- DetectionStandardize:
max_value: 255.
- DetectionTargetsFormatTransform:
input_dim: ${dataset_params.val_dataset_params.input_dim}
output_format: LABEL_CXCYWH
class_inclusion_list:
max_num_samples:

val_dataloader_params:
batch_size: 25
num_workers: 8
drop_last: False
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

_convert_: all
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
train_dataset_params:
data_dir: /data/coco # root path to coco data
subdir: images/train2017 # sub directory path of data_dir containing the train data.
json_file: instances_train2017.json # path to coco train json file, data_dir/annotations/train_json_file.
input_dim: [640, 640]
cache_dir:
cache: False
transforms:
# - DetectionRandomSideCrop:
# min_rel_width : 0.3
# max_rel_width : 0.6
# p_side_right: 0.5
# prob: 0.25
- DetectionRandomAffine:
degrees: 0 # rotation degrees, randomly sampled from [-degrees, degrees]
translate: 0.25 # image translation fraction
scales: [ 0.5, 1.5 ] # random rescale range (keeps size by padding/cropping) after mosaic transform.
shear: 0.0 # shear degrees, randomly sampled from [-degrees, degrees]
target_size:
filter_box_candidates: True # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
- DetectionRGB2BGR:
prob: 0.5
- DetectionHSV:
prob: 0.5 # probability to apply HSV transform
hgain: 18 # HSV transform hue gain (randomly sampled from [-hgain, hgain])
sgain: 30 # HSV transform saturation gain (randomly sampled from [-sgain, sgain])
vgain: 30 # HSV transform value gain (randomly sampled from [-vgain, vgain])
- DetectionHorizontalFlip:
prob: 0.5 # probability to apply horizontal flip
- DetectionMixup:
input_dim:
mixup_scale: [ 0.5, 1.5 ] # random rescale range for the additional sample in mixup
prob: 0.5 # probability to apply per-sample mixup
flip_prob: 0.5 # probability to apply horizontal flip
- DetectionPaddedRescale:
input_dim: [640, 640]
max_targets: 120
pad_value: 114
- DetectionStandardize:
max_value: 255.
- DetectionTargetsFormatTransform:
max_targets: 256
output_format: LABEL_CXCYWH

tight_box_rotation: False
class_inclusion_list:
max_num_samples:
with_crowd: False

train_dataloader_params:
batch_size: 25
num_workers: 8
shuffle: True
drop_last: True
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

val_dataset_params:
data_dir: /data/coco # root path to coco data
subdir: images/val2017 # sub directory path of data_dir containing the train data.
json_file: instances_val2017.json # path to coco train json file, data_dir/annotations/train_json_file.
input_dim: [636, 636]
cache_dir:
cache: False
transforms:
- DetectionRGB2BGR:
prob: 1
- DetectionPadToSize:
output_size: [640, 640]
pad_value: 114
- DetectionStandardize:
max_value: 255.
- DetectionImagePermute
- DetectionTargetsFormatTransform:
max_targets: 50
input_dim: [640, 640]
output_format: LABEL_CXCYWH
tight_box_rotation: False
class_inclusion_list:
max_num_samples:
with_crowd: True

val_dataloader_params:
batch_size: 25
num_workers: 8
drop_last: False
shuffle: False
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN

_convert_: all
21 changes: 21 additions & 0 deletions src/super_gradients/training/dataloaders/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,27 @@ def coco2017_val_yolo_nas(dataset_params: Dict = None, dataloader_params: Dict =
dataloader_params=dataloader_params,
)

@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_TRAIN_CUSTOM)
def coco_detection_yolo_format_train_custom(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
return get_data_loader(
config_name="coco_detection_yolo_format_base_dataset_params_custom",
dataset_cls=YoloDarknetFormatDetectionDataset,
train=True,
dataset_params=dataset_params,
dataloader_params=dataloader_params,
)


@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_VAL_CUSTOM)
def coco_detection_yolo_format_val_custom(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
return get_data_loader(
config_name="coco_detection_yolo_format_base_dataset_params_custom",
dataset_cls=YoloDarknetFormatDetectionDataset,
train=False,
dataset_params=dataset_params,
dataloader_params=dataloader_params,
)


@register_dataloader(Dataloaders.COCO2017_TRAIN_PPYOLOE)
def coco2017_train_ppyoloe(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def set_dataset_processing_params(
iou: Optional[float] = None,
conf: Optional[float] = None,
) -> None:

"""Set the processing parameters for the dataset.

:param class_names: (Optional) Names of the dataset the model was trained on.
Expand Down Expand Up @@ -173,6 +174,7 @@ def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Option
:param fuse_model: If True, create a copy of the model, and fuse some of its layers to increase performance. This increases memory usage.
"""
pipeline = self._get_pipeline(iou=iou, conf=conf, fuse_model=fuse_model)

return pipeline(images) # type: ignore

def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None, fuse_model: bool = True):
Expand Down
1 change: 1 addition & 0 deletions src/super_gradients/training/pipelines/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def _generate_prediction_result_single_batch(self, images: Iterable[np.ndarray])

# Preprocess
preprocessed_images, processing_metadatas = [], []

for image in images:
preprocessed_image, processing_metadata = self.image_processor.preprocess_image(image=image.copy())
preprocessed_images.append(preprocessed_image)
Expand Down
19 changes: 19 additions & 0 deletions src/super_gradients/training/processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,23 @@ def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, RescaleMetada

return rescaled_image, RescaleMetadata(original_shape=image.shape[:2], scale_factor_h=scale_factor_h, scale_factor_w=scale_factor_w)

class _RescaleWithSwap(Processing, ABC):
"""Resize image to given image dimensions WITHOUT preserving aspect ratio.

:param output_shape: (H, W)
"""

def __init__(self, output_shape: Tuple[int, int], swap):
self.output_shape = output_shape

def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, RescaleMetadata]:

scale_factor_h, scale_factor_w = self.output_shape[0] / image.shape[0], self.output_shape[1] / image.shape[1]
rescaled_image = _rescale_image_with_swap(image, target_shape=self.output_shape)

return rescaled_image, RescaleMetadata(original_shape=image.shape[:2], scale_factor_h=scale_factor_h, scale_factor_w=scale_factor_w)



class _LongestMaxSizeRescale(Processing, ABC):
"""Resize image to given image dimensions WITH preserving aspect ratio.
Expand All @@ -249,9 +266,11 @@ def preprocess_image(self, image: np.ndarray) -> Tuple[np.ndarray, RescaleMetada
class DetectionRescale(_Rescale):
def postprocess_predictions(self, predictions: DetectionPrediction, metadata: RescaleMetadata) -> DetectionPrediction:
predictions.bboxes_xyxy = _rescale_bboxes(targets=predictions.bboxes_xyxy, scale_factors=(1 / metadata.scale_factor_h, 1 / metadata.scale_factor_w))

return predictions



@register_processing(Processings.DetectionLongestMaxSizeRescale)
class DetectionLongestMaxSizeRescale(_LongestMaxSizeRescale):
def postprocess_predictions(self, predictions: DetectionPrediction, metadata: RescaleMetadata) -> DetectionPrediction:
Expand Down
Loading