-
Notifications
You must be signed in to change notification settings - Fork 7.1k
Rotated bboxes transforms #9084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
734aed2
9827ab6
87a238c
95ed7cf
a7d07dc
3996daa
3b4100c
e223c6f
36b02dd
57f2452
a15a057
4bde5e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -560,6 +560,78 @@ def affine_bounding_boxes(bounding_boxes): | |
) | ||
|
||
|
||
def reference_affine_rotated_bounding_boxes_helper(bounding_boxes, *, affine_matrix, new_canvas_size=None, clamp=True): | ||
format = bounding_boxes.format | ||
canvas_size = new_canvas_size or bounding_boxes.canvas_size | ||
|
||
def affine_rotated_bounding_boxes(bounding_boxes): | ||
dtype = bounding_boxes.dtype | ||
device = bounding_boxes.device | ||
|
||
# Go to float before converting to prevent precision loss in case of CXCYWHR -> XYXYXYXY and W or H is 1 | ||
input_xyxyxyxy = F.convert_bounding_box_format( | ||
bounding_boxes.to(dtype=torch.float64, device="cpu", copy=True), | ||
old_format=format, | ||
new_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, | ||
inplace=True, | ||
) | ||
x1, y1, x2, y2, x3, y3, x4, y4 = input_xyxyxyxy.squeeze(0).tolist() | ||
|
||
points = np.array( | ||
[ | ||
[x1, y1, 1.0], | ||
[x2, y2, 1.0], | ||
[x3, y3, 1.0], | ||
[x4, y4, 1.0], | ||
] | ||
) | ||
transformed_points = np.matmul(points, affine_matrix.astype(points.dtype).T) | ||
output = torch.Tensor( | ||
[ | ||
float(transformed_points[1, 0]), | ||
float(transformed_points[1, 1]), | ||
float(transformed_points[0, 0]), | ||
float(transformed_points[0, 1]), | ||
float(transformed_points[3, 0]), | ||
float(transformed_points[3, 1]), | ||
float(transformed_points[2, 0]), | ||
float(transformed_points[2, 1]), | ||
] | ||
) | ||
|
||
output = F.convert_bounding_box_format( | ||
output, old_format=tv_tensors.BoundingBoxFormat.XYXYXYXY, new_format=format | ||
) | ||
|
||
if clamp: | ||
# It is important to clamp before casting, especially for CXCYWHR format, dtype=int64 | ||
output = F.clamp_bounding_boxes( | ||
output, | ||
format=format, | ||
canvas_size=canvas_size, | ||
) | ||
else: | ||
# We leave the bounding box as float32 so the caller gets the full precision to perform any additional | ||
# operation | ||
dtype = output.dtype | ||
|
||
return output.to(dtype=dtype, device=device) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we cast back to the input dtype unconditionally? In general the transforms should preserve the input dtype, but here's it's not clear that we are? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this test, we are creating an intermediate tensor and therefore make sure to cast it to the correct dtype. |
||
|
||
return tv_tensors.BoundingBoxes( | ||
torch.cat( | ||
[ | ||
affine_rotated_bounding_boxes(b) | ||
for b in bounding_boxes.reshape( | ||
-1, 5 if format != tv_tensors.BoundingBoxFormat.XYXYXYXY else 8 | ||
).unbind() | ||
], | ||
dim=0, | ||
).reshape(bounding_boxes.shape), | ||
format=format, | ||
canvas_size=canvas_size, | ||
) | ||
|
||
|
||
class TestResize: | ||
INPUT_SIZE = (17, 11) | ||
OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)] | ||
|
@@ -1012,7 +1084,7 @@ class TestHorizontalFlip: | |
def test_kernel_image(self, dtype, device): | ||
check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device)) | ||
|
||
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS) | ||
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) | ||
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) | ||
@pytest.mark.parametrize("device", cpu_and_cuda()) | ||
def test_kernel_bounding_boxes(self, format, dtype, device): | ||
|
@@ -1071,17 +1143,22 @@ def test_image_correctness(self, fn): | |
|
||
torch.testing.assert_close(actual, expected) | ||
|
||
def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes): | ||
def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): | ||
affine_matrix = np.array( | ||
[ | ||
[-1, 0, bounding_boxes.canvas_size[1]], | ||
[0, 1, 0], | ||
], | ||
) | ||
|
||
return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix) | ||
helper = ( | ||
reference_affine_rotated_bounding_boxes_helper | ||
if tv_tensors.is_rotated_bounding_format(bounding_boxes.format) | ||
else reference_affine_bounding_boxes_helper | ||
) | ||
return helper(bounding_boxes, affine_matrix=affine_matrix) | ||
|
||
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS) | ||
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) | ||
@pytest.mark.parametrize( | ||
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] | ||
) | ||
|
@@ -1464,7 +1541,7 @@ class TestVerticalFlip: | |
def test_kernel_image(self, dtype, device): | ||
check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device)) | ||
|
||
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS) | ||
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) | ||
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) | ||
@pytest.mark.parametrize("device", cpu_and_cuda()) | ||
def test_kernel_bounding_boxes(self, format, dtype, device): | ||
|
@@ -1521,17 +1598,22 @@ def test_image_correctness(self, fn): | |
|
||
torch.testing.assert_close(actual, expected) | ||
|
||
def _reference_vertical_flip_bounding_boxes(self, bounding_boxes): | ||
def _reference_vertical_flip_bounding_boxes(self, bounding_boxes: tv_tensors.BoundingBoxes): | ||
affine_matrix = np.array( | ||
[ | ||
[1, 0, 0], | ||
[0, -1, bounding_boxes.canvas_size[0]], | ||
], | ||
) | ||
|
||
return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix) | ||
helper = ( | ||
reference_affine_rotated_bounding_boxes_helper | ||
if tv_tensors.is_rotated_bounding_format(bounding_boxes.format) | ||
else reference_affine_bounding_boxes_helper | ||
) | ||
return helper(bounding_boxes, affine_matrix=affine_matrix) | ||
|
||
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS) | ||
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat)) | ||
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) | ||
def test_bounding_boxes_correctness(self, format, fn): | ||
bounding_boxes = make_bounding_boxes(format=format) | ||
|
Uh oh!
There was an error while loading. Please reload this page.