Skip to content

create class #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/super_gradients/common/object_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class Transforms:
DetectionPaddedRescale = "DetectionPaddedRescale"
DetectionTargetsFormatTransform = "DetectionTargetsFormatTransform"
DetectionNormalize = "DetectionNormalize"
DetectionRandomSideCrop = "DetectionRandomSideCrop"
#
RandomResizedCropAndInterpolation = "RandomResizedCropAndInterpolation"
RandAugmentTransform = "RandAugmentTransform"
Expand Down Expand Up @@ -333,6 +334,8 @@ class Dataloaders:
COCO2017_VAL_YOLOX = "coco2017_val_yolox"
COCO2017_TRAIN_YOLO_NAS = "coco2017_train_yolo_nas"
COCO2017_VAL_YOLO_NAS = "coco2017_val_yolo_nas"
COCO_DETECTION_YOLO_FORMAT_TRAIN_CUSTOM = "coco_detection_yolo_format_train_custom"
COCO_DETECTION_YOLO_FORMAT_VAL_CUSTOM = "coco_detection_yolo_format_val_custom"
COCO2017_TRAIN_PPYOLOE = "coco2017_train_ppyoloe"
COCO2017_VAL_PPYOLOE = "coco2017_val_ppyoloe"
COCO2017_TRAIN_SSD_LITE_MOBILENET_V2 = "coco2017_train_ssd_lite_mobilenet_v2"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@

train_dataset_params:
data_dir: /data/coco # TO FILL: Where the data is stored.
images_dir: images/train2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`.
labels_dir: labels/train2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`.
classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign,
parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag,
tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard,
tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot,
hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote,
keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear,
hair drier, toothbrush] # TO FILL: List of classes used in your dataset.
input_dim: [640, 640]
cache_dir:
cache: False
transforms:
- DetectionRandomSideCrop:
min_rel_width : 0.3
max_rel_width : 0.6
p_side_right: 0.5
prob: 0.25
# - DetectionMosaic:
# input_dim: ${dataset_params.train_dataset_params.input_dim}
# prob: 1.
- DetectionRandomAffine:
degrees: 10. # rotation degrees, randomly sampled from [-degrees, degrees]
translate: 0.1 # image translation fraction
scales: [ 0.1, 2 ] # random rescale range (keeps size by padding/cropping) after mosaic transform.
shear: 2.0 # shear degrees, randomly sampled from [-degrees, degrees]
target_size: ${dataset_params.train_dataset_params.input_dim}
filter_box_candidates: True # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
- DetectionMixup:
input_dim: ${dataset_params.train_dataset_params.input_dim}
mixup_scale: [ 0.5, 1.5 ] # random rescale range for the additional sample in mixup
prob: 1.0 # probability to apply per-sample mixup
flip_prob: 0.5 # probability to apply horizontal flip
- DetectionHSV:
prob: 1.0 # probability to apply HSV transform
hgain: 5 # HSV transform hue gain (randomly sampled from [-hgain, hgain])
sgain: 30 # HSV transform saturation gain (randomly sampled from [-sgain, sgain])
vgain: 30 # HSV transform value gain (randomly sampled from [-vgain, vgain])
- DetectionHorizontalFlip:
prob: 0.5 # probability to apply horizontal flip
- DetectionPaddedRescale:
input_dim: ${dataset_params.train_dataset_params.input_dim}
max_targets: 120
- DetectionTargetsFormatTransform:
input_dim: ${dataset_params.train_dataset_params.input_dim}
output_format: LABEL_CXCYWH
class_inclusion_list:
max_num_samples:

train_dataloader_params:
batch_size: 25
num_workers: 8
shuffle: True
drop_last: True
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

val_dataset_params:
data_dir: /data/coco # TO FILL: Where the data is stored.
images_dir: images/val2017 # TO FILL: Local path to directory that includes all the images. Path relative to `data_dir`. Can be the same as `labels_dir`.
labels_dir: labels/val2017 # TO FILL: Local path to directory that includes all the labels. Path relative to `data_dir`. Can be the same as `images_dir`.
classes: [ person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign,
parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag,
tie, suitcase, frisbee, skis, snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard,
tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot,
hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote,
keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear,
hair drier, toothbrush] # TO FILL: List of classes used in your dataset.
input_dim: [640, 640]
cache_dir:
cache: False
transforms:
- DetectionPaddedRescale:
input_dim: ${dataset_params.val_dataset_params.input_dim}
- DetectionTargetsFormatTransform:
max_targets: 50
input_dim: ${dataset_params.val_dataset_params.input_dim}
output_format: LABEL_CXCYWH
class_inclusion_list:
max_num_samples:

val_dataloader_params:
batch_size: 25
num_workers: 8
drop_last: False
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

_convert_: all
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
train_dataset_params:
data_dir: /data/coco # root path to coco data
subdir: images/train2017 # sub directory path of data_dir containing the train data.
json_file: instances_train2017.json # path to coco train json file, data_dir/annotations/train_json_file.
input_dim: [640, 640]
cache_dir:
cache: False
transforms:
- DetectionRandomSideCrop:
min_rel_width : 0.3
max_rel_width : 0.6
p_side_right: 0.5
prob: 0.25
- DetectionRandomAffine:
degrees: 0 # rotation degrees, randomly sampled from [-degrees, degrees]
translate: 0.25 # image translation fraction
scales: [ 0.5, 1.5 ] # random rescale range (keeps size by padding/cropping) after mosaic transform.
shear: 0.0 # shear degrees, randomly sampled from [-degrees, degrees]
target_size:
filter_box_candidates: True # whether to filter out transformed bboxes by edge size, area ratio, and aspect ratio.
wh_thr: 2 # edge size threshold when filter_box_candidates = True (pixels)
area_thr: 0.1 # threshold for area ratio between original image and the transformed one, when when filter_box_candidates = True
ar_thr: 20 # aspect ratio threshold when filter_box_candidates = True
- DetectionRGB2BGR:
prob: 0.5
- DetectionHSV:
prob: 0.5 # probability to apply HSV transform
hgain: 18 # HSV transform hue gain (randomly sampled from [-hgain, hgain])
sgain: 30 # HSV transform saturation gain (randomly sampled from [-sgain, sgain])
vgain: 30 # HSV transform value gain (randomly sampled from [-vgain, vgain])
- DetectionHorizontalFlip:
prob: 0.5 # probability to apply horizontal flip
- DetectionMixup:
input_dim:
mixup_scale: [ 0.5, 1.5 ] # random rescale range for the additional sample in mixup
prob: 0.5 # probability to apply per-sample mixup
flip_prob: 0.5 # probability to apply horizontal flip
- DetectionPaddedRescale:
input_dim: [640, 640]
max_targets: 120
pad_value: 114
- DetectionStandardize:
max_value: 255.
- DetectionTargetsFormatTransform:
max_targets: 256
output_format: LABEL_CXCYWH

tight_box_rotation: False
class_inclusion_list:
max_num_samples:
with_crowd: False

train_dataloader_params:
batch_size: 25
num_workers: 8
shuffle: True
drop_last: True
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.DetectionCollateFN

val_dataset_params:
data_dir: /data/coco # root path to coco data
subdir: images/val2017 # sub directory path of data_dir containing the train data.
json_file: instances_val2017.json # path to coco train json file, data_dir/annotations/train_json_file.
input_dim: [636, 636]
cache_dir:
cache: False
transforms:
- DetectionRGB2BGR:
prob: 1
- DetectionPadToSize:
output_size: [640, 640]
pad_value: 114
- DetectionStandardize:
max_value: 255.
- DetectionImagePermute
- DetectionTargetsFormatTransform:
max_targets: 50
input_dim: [640, 640]
output_format: LABEL_CXCYWH
tight_box_rotation: False
class_inclusion_list:
max_num_samples:
with_crowd: True

val_dataloader_params:
batch_size: 25
num_workers: 8
drop_last: False
shuffle: False
pin_memory: True
collate_fn:
_target_: super_gradients.training.utils.detection_utils.CrowdDetectionCollateFN

_convert_: all
21 changes: 21 additions & 0 deletions src/super_gradients/training/dataloaders/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,27 @@ def coco2017_val_yolo_nas(dataset_params: Dict = None, dataloader_params: Dict =
dataloader_params=dataloader_params,
)

@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_TRAIN_CUSTOM)
def coco_detection_yolo_format_train_custom(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
return get_data_loader(
config_name="coco_detection_yolo_format_base_dataset_params_custom",
dataset_cls=YoloDarknetFormatDetectionDataset,
train=True,
dataset_params=dataset_params,
dataloader_params=dataloader_params,
)


@register_dataloader(Dataloaders.COCO_DETECTION_YOLO_FORMAT_VAL_CUSTOM)
def coco_detection_yolo_format_val_custom(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
return get_data_loader(
config_name="coco_detection_yolo_format_base_dataset_params_custom",
dataset_cls=YoloDarknetFormatDetectionDataset,
train=False,
dataset_params=dataset_params,
dataloader_params=dataloader_params,
)


@register_dataloader(Dataloaders.COCO2017_TRAIN_PPYOLOE)
def coco2017_train_ppyoloe(dataset_params: Dict = None, dataloader_params: Dict = None) -> DataLoader:
Expand Down
102 changes: 102 additions & 0 deletions src/super_gradients/training/transforms/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,108 @@ def get_equivalent_preprocessing(self) -> List:
return []


@register_transform(Transforms.DetectionRandomSideCrop)
class DetectionRandomSideCrop(DetectionTransform):
"""Preprocessing transform to crop in width an image and bboxes from the border.
This is, the output image will have a width between `min_rel_width` and `max_rel_width` of the original image.

Note: It assumes the targets are in (X,Y,X,Y,label) format.
"""

def __init__(self, min_rel_width:float = 0.3, max_rel_width: float = 0.6, p_side_right: float = 0.5, prob: float = 1.0):
"""_summary_

:param min_rel_width: minimum relative width of the resulting crop, defaults to 0.3
:param max_rel_width: maximum relative width of the resulting crop, defaults to 0.6
:param p_side_right: probability of keeping the right side when croping, defaults to 0.5
:param prob: probability of applying the transformation, defaults to 1.0
:raises AssertionError: Input parameters are not in the correct range
"""

assert 0 < min_rel_width <= 1, f"`min_rel_width` value must be between 0 (not included) and 1, found {min_rel_width}"
assert 0 <= max_rel_width <= 1, f"`max_rel_width` value must be between 0 and 1, found {max_rel_width}"
assert 0 <= prob <= 1, f"Probability value must be between 0 and 1, found {prob}"
assert 0 <= p_side_right <= 1, f"Probability of side value must be between 0 and 1, found {p_side_right}"
super(DetectionRandomSideCrop, self).__init__()
self.max_rel_width = max_rel_width
self.min_rel_width = min_rel_width
self.p_side_right = p_side_right
self.p = prob

def __call__(self, sample: dict[str, np.array]) -> dict[str, np.array]:
if random.random() > self.p:
return sample

side = "right" if random.random() > self.p_side_right else "left"
random_rel_x = random.uniform(self.min_rel_width, self.max_rel_width)

image, targets = sample["image"], sample["target"]
bboxes = targets[:,:4]

abs_x = min(int(random_rel_x * image.shape[1]),image.shape[1]-1)

if side == "left":
abs_x = image.shape[1] - abs_x

sample["image"] = self._crop_image(image, abs_x, side)
boxes, kept_indices = self._crop_bboxes(bboxes, abs_x, side)
targets = targets[kept_indices]
targets[:,:4] = boxes
sample["target"] = targets


if "crowd_target" in sample.keys():
crowd_targets = sample["crowd_target"]
boxes = crowd_targets.copy()
boxes, kept_indices = self._crop_bboxes(bboxes, abs_x, side)
targets = targets[kept_indices]
targets[:,:4] = boxes
sample["crowd_target"] = targets

return sample

def _crop_image(self, img: np.ndarray, abs_x: int, side: str) -> np.ndarray:
"""Return the cropped image.

:param img: Numpy array of image
:param abx_x: Absolute value of the x coordinate to crop
:param side: Side of the resulting crop. Either "right" or "left"
:return: Numpy array of cropped image
"""
if side == "right":
output_img = img[:, abs_x:]
else:
output_img = img[:, :abs_x]
return output_img

def _crop_bboxes(self, bboxes: np.ndarray, abs_x: int, side:str) -> np.ndarray:
"""Return the bboxes that are inside the crop. In the case of intersection, the bbox is cropped.

:param bboxes: Numpy array of bounding boxes in (X,Y,X,Y) format. Shape (N,4)
:param abx_x: Absolute value of the x coordinate to crop
:param side: Side of the resulting crop. Either "right" or "left"
:return: Numpy array of cropped bounding boxes in (X,Y,X,Y) format. Shape (N',4)
"""

fixed_bboxes = []
kept_indices = []
if side == "right":
for i, bbox in enumerate(bboxes):
# bottom right corner is inside the crop (right side of the image)
if bbox[2] > abs_x:
kept_indices.append(i)
fixed_bboxes.append([max(bbox[0], abs_x) - abs_x, bbox[1], bbox[2] - abs_x, bbox[3]])
else:
for i, bbox in enumerate(bboxes):
# upper left corner is inside the crop (left side of the image)
if bbox[0] < abs_x:
kept_indices.append(i)
fixed_bboxes.append([bbox[0], bbox[1], min(bbox[2], abs_x), bbox[3]])

return np.array(fixed_bboxes).reshape((-1,4)), kept_indices



def get_aug_params(value: Union[tuple, float], center: float = 0) -> float:
"""
Generates a random value for augmentations as described below
Expand Down