Skip to content
This repository was archived by the owner on Mar 10, 2026. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
9bc256f
chore: initial commit
ariG23498 Aug 4, 2023
a8ad7c4
review comments
ariG23498 Aug 17, 2023
d523a32
Merge branch 'master' into aritra/port-rcnn
ariG23498 Aug 17, 2023
ed3337c
chore: train test step modification
ariG23498 Aug 18, 2023
301bb1d
Merge branch 'master' into aritra/port-rcnn
ariG23498 Aug 28, 2023
005f70d
review nits
ariG23498 Aug 28, 2023
da5a01e
chore: adding test
ariG23498 Sep 1, 2023
ea88f2c
Merge branch 'master' into aritra/port-rcnn
ariG23498 Sep 1, 2023
5c7048f
Merge branch 'master' into aritra/port-rcnn
ariG23498 Sep 7, 2023
ac005b8
chore: reformat compute loss
ariG23498 Sep 7, 2023
613e29f
chore: faster rcnn call and predict work
ariG23498 Sep 15, 2023
dcb648a
resolved conflicts
ariG23498 Sep 16, 2023
5bf2bc9
chore: porting roi align to keras core
ariG23498 Sep 16, 2023
7d6ef6f
chore: port roi sampler to keras core
ariG23498 Sep 16, 2023
f1e3e17
chore: port rpn label encoder to keras core
ariG23498 Sep 16, 2023
6478cbf
chore: adding tests and fix lint
ariG23498 Sep 16, 2023
7741edc
fix: lint
ariG23498 Sep 16, 2023
13a26e6
chore: adding copyright to faster rcnn presets script
ariG23498 Sep 16, 2023
0bc4cfa
Merge branch 'master' into aritra/port-rcnn
ariG23498 Sep 19, 2023
3b42ecc
chore: removing tf imports
ariG23498 Sep 21, 2023
be9178b
fix imports
ariG23498 Sep 27, 2023
c3b0cfa
Merge branch 'master' into aritra/port-rcnn
ariG23498 Nov 2, 2023
54fd49c
Merge branch 'master' into aritra/port-rcnn
ariG23498 Nov 6, 2023
e59d2b4
fix: style
ariG23498 Nov 6, 2023
001162c
chore: making the model functional in init
ariG23498 Nov 7, 2023
4889192
Merge branch 'master' into aritra/port-rcnn
ariG23498 Nov 7, 2023
4da5ff1
Merge branch 'master' into aritra/port-rcnn
ariG23498 Nov 22, 2023
6a51562
Merge branch 'master' into aritra/port-rcnn
ariG23498 Dec 4, 2023
36da548
Merge branch 'master' into aritra/port-rcnn
ariG23498 Dec 6, 2023
711c031
Merge branch 'master' into aritra/port-rcnn
ariG23498 Dec 18, 2023
9aab0e9
chore: adding static image shapes to backbone in tests
ariG23498 Dec 18, 2023
49815d1
fix: parameterised input shape in test
ariG23498 Dec 18, 2023
6061f01
fix: reshape
ariG23498 Dec 18, 2023
ef279a9
fix: format and output dict
ariG23498 Dec 18, 2023
134f897
chore: masking sample weights for box labels -1
ariG23498 Dec 19, 2023
e190e1b
chore: fixing sample weights and decode predictions
ariG23498 Dec 19, 2023
70f205c
Merge branch 'master' into aritra/port-rcnn
ariG23498 Jan 2, 2024
821b7aa
chore: porting roi gen to keras 3 ops
ariG23498 Jan 2, 2024
324f7fc
Merge branch 'master' into aritra/port-rcnn
ariG23498 Jan 10, 2024
9227255
chore: port roi gen to keras 3
ariG23498 Jan 10, 2024
345764f
chore: removing asserts for keras 3
ariG23498 Jan 10, 2024
3a714e7
Merge branch 'master' into aritra/port-rcnn
ariG23498 Feb 28, 2024
9e7eea0
chore: adding faster rcnn to kokoro build script
ariG23498 Feb 28, 2024
af47e3f
chore: changing a bunch of things and keeping it commited for reference
ariG23498 Feb 28, 2024
fd20746
Merge branch 'master' into aritra/port-rcnn
ariG23498 Mar 13, 2024
2f5c0a2
chore: update roi align
ariG23498 Mar 13, 2024
9c85dfc
chore: adding init and compute loss
ariG23498 Mar 14, 2024
e26a8ef
chore: format
ariG23498 Mar 14, 2024
5a1f5a7
chore: demo.py
ariG23498 Mar 14, 2024
7d873f6
Merge branch 'master' into aritra/port-rcnn
ariG23498 Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 102 additions & 80 deletions keras_cv/layers/object_detection/roi_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
from typing import Optional
from typing import Tuple

# TODO (ariG23498): remove tf and correct the type imports
import tensorflow as tf
from tensorflow import keras

from keras_cv import bounding_box
from keras_cv.backend import assert_tf_keras
from keras_cv.backend import keras
from keras_cv.backend import ops


def _feature_bilinear_interpolation(
Expand Down Expand Up @@ -49,7 +51,7 @@ def _feature_bilinear_interpolation(
A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size, num_filters].
"""
features_shape = tf.shape(features)
features_shape = ops.shape(features)
batch_size, num_boxes, output_size, num_filters = (
features_shape[0],
features_shape[1],
Expand All @@ -58,22 +60,28 @@ def _feature_bilinear_interpolation(
)

output_size = output_size // 2
kernel_y = tf.reshape(kernel_y, [batch_size, num_boxes, output_size * 2, 1])
kernel_x = tf.reshape(kernel_x, [batch_size, num_boxes, 1, output_size * 2])
kernel_y = ops.reshape(
kernel_y, [batch_size, num_boxes, output_size * 2, 1]
)
kernel_x = ops.reshape(
kernel_x, [batch_size, num_boxes, 1, output_size * 2]
)
# Use implicit broadcast to generate the interpolation kernel. The
# multiplier `4` is for avg pooling.
interpolation_kernel = kernel_y * kernel_x * 4

# Interpolate the gathered features with computed interpolation kernels.
features *= tf.cast(
tf.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype
features *= ops.cast(
ops.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype
)
features = tf.reshape(
features = ops.reshape(
features,
[batch_size * num_boxes, output_size * 2, output_size * 2, num_filters],
)
features = tf.nn.avg_pool(features, [1, 2, 2, 1], [1, 2, 2, 1], "VALID")
features = tf.reshape(
features = ops.nn.average_pool(
features, [1, 2, 2, 1], [1, 2, 2, 1], "VALID"
)
features = ops.reshape(
features, [batch_size, num_boxes, output_size, output_size, num_filters]
)
return features
Expand Down Expand Up @@ -108,10 +116,10 @@ def _compute_grid_positions(
box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2]
box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2]
"""
boxes_shape = tf.shape(boxes)
boxes_shape = ops.shape(boxes)
batch_size, num_boxes = boxes_shape[0], boxes_shape[1]
if batch_size is None:
batch_size = tf.shape(boxes)[0]
batch_size = ops.shape(boxes)[0]
box_grid_x = []
box_grid_y = []
for i in range(output_size):
Expand All @@ -121,29 +129,33 @@ def _compute_grid_positions(
box_grid_y.append(
boxes[:, :, 0] + (i + sample_offset) * boxes[:, :, 2] / output_size
)
box_grid_x = tf.stack(box_grid_x, axis=2)
box_grid_y = tf.stack(box_grid_y, axis=2)
box_grid_x = ops.stack(box_grid_x, axis=2)
box_grid_y = ops.stack(box_grid_y, axis=2)

box_grid_y0 = tf.floor(box_grid_y)
box_grid_x0 = tf.floor(box_grid_x)
box_grid_x0 = tf.maximum(tf.cast(0.0, dtype=box_grid_x0.dtype), box_grid_x0)
box_grid_y0 = tf.maximum(tf.cast(0.0, dtype=box_grid_y0.dtype), box_grid_y0)
box_grid_y0 = ops.floor(box_grid_y)
box_grid_x0 = ops.floor(box_grid_x)
box_grid_x0 = ops.maximum(
ops.cast(0.0, dtype=box_grid_x0.dtype), box_grid_x0
)
box_grid_y0 = ops.maximum(
ops.cast(0.0, dtype=box_grid_y0.dtype), box_grid_y0
)

box_grid_x0 = tf.minimum(
box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)
box_grid_x0 = ops.minimum(
box_grid_x0, ops.expand_dims(boundaries[:, :, 1], -1)
)
box_grid_x1 = tf.minimum(
box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1)
box_grid_x1 = ops.minimum(
box_grid_x0 + 1, ops.expand_dims(boundaries[:, :, 1], -1)
)
box_grid_y0 = tf.minimum(
box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)
box_grid_y0 = ops.minimum(
box_grid_y0, ops.expand_dims(boundaries[:, :, 0], -1)
)
box_grid_y1 = tf.minimum(
box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1)
box_grid_y1 = ops.minimum(
box_grid_y0 + 1, ops.expand_dims(boundaries[:, :, 0], -1)
)

box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1)
box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1)
box_gridx0x1 = ops.stack([box_grid_x0, box_grid_x1], axis=-1)
box_gridy0y1 = ops.stack([box_grid_y0, box_grid_y1], axis=-1)

# The RoIAlign feature f can be computed by bilinear interpolation of four
# neighboring feature points f0, f1, f2, and f3.
Expand All @@ -155,11 +167,11 @@ def _compute_grid_positions(
lx = box_grid_x - box_grid_x0
hy = 1.0 - ly
hx = 1.0 - lx
kernel_y = tf.reshape(
tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]
kernel_y = ops.reshape(
ops.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]
)
kernel_x = tf.reshape(
tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]
kernel_x = ops.reshape(
ops.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]
)
return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1

Expand Down Expand Up @@ -194,39 +206,43 @@ def multilevel_crop_and_resize(
[batch_size, num_boxes, output_size, output_size, num_filters].
"""

with tf.name_scope("multilevel_crop_and_resize"):
with keras.backend.name_scope("multilevel_crop_and_resize"):
levels_str = list(features.keys())
# Levels are represented by strings with a prefix "P" to represent
# pyramid levels. The integer level can be obtained by looking at
# the value that follows the "P".
levels = [int(level_str[1:]) for level_str in levels_str]
min_level = min(levels)
max_level = max(levels)
features_shape = tf.shape(features[f"P{min_level}"])
features_shape = ops.shape(features[f"P{min_level}"])
batch_size, max_feature_height, max_feature_width, num_filters = (
features_shape[0],
features_shape[1],
features_shape[2],
features_shape[3],
)

num_boxes = tf.shape(boxes)[1]
num_boxes = ops.shape(boxes)[1]

# Stack feature pyramid into a features_all of shape
# [batch_size, levels, height, width, num_filters].
features_all = []
feature_heights = []
feature_widths = []
for level in range(min_level, max_level + 1):
shape = features[f"P{level}"].get_shape().as_list()
shape = ops.shape(features[f"P{level}"])
feature_heights.append(shape[1])
feature_widths.append(shape[2])
# Concat tensor of [batch_size, height_l * width_l, num_filters] for
# each level.
features_all.append(
tf.reshape(features[f"P{level}"], [batch_size, -1, num_filters])
ops.reshape(
features[f"P{level}"], [batch_size, -1, num_filters]
)
)
features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters])
features_r2 = ops.reshape(
ops.concatenate(features_all, 1), [-1, num_filters]
)

# Calculate height_l * width_l for each level.
level_dim_sizes = [
Expand All @@ -238,59 +254,63 @@ def multilevel_crop_and_resize(
for i in range(len(feature_widths) - 1):
level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
height_dim_sizes = tf.constant(feature_widths, tf.int32)
level_dim_offsets = keras.backend.constant(level_dim_offsets, "int32")
height_dim_sizes = keras.backend.constant(feature_widths, "int32")

# Assigns boxes to the right level.
box_width = boxes[:, :, 3] - boxes[:, :, 1]
box_height = boxes[:, :, 2] - boxes[:, :, 0]
areas_sqrt = tf.sqrt(
tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32)
areas_sqrt = ops.sqrt(
ops.cast(box_height, "float32") * ops.cast(box_width, "float32")
)

# following the FPN paper to divide by 224.
levels = tf.cast(
tf.math.floordiv(
tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)),
tf.math.log(2.0),
levels = ops.cast(
ops.numpy.floor_divide(
ops.numpy.log(
ops.numpy.divide(areas_sqrt, 224.0)
), # tf.math.divide_no_nan
ops.numpy.log(2.0),
)
+ 4.0,
dtype=tf.int32,
dtype="int32",
)
# Maps levels between [min_level, max_level].
levels = tf.minimum(max_level, tf.maximum(levels, min_level))
levels = ops.minimum(max_level, ops.maximum(levels, min_level))

# Projects box location and sizes to corresponding feature levels.
scale_to_level = tf.cast(
tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)),
scale_to_level = ops.cast(
ops.numpy.power(
keras.backend.constant(2.0), ops.cast(levels, "float32")
),
dtype=boxes.dtype,
)
boxes /= tf.expand_dims(scale_to_level, axis=2)
boxes /= ops.expand_dims(scale_to_level, axis=2)
box_width /= scale_to_level
box_height /= scale_to_level
boxes = tf.concat(
boxes = ops.concatenate(
[
boxes[:, :, 0:2],
tf.expand_dims(box_height, -1),
tf.expand_dims(box_width, -1),
ops.expand_dims(box_height, -1),
ops.expand_dims(box_width, -1),
],
axis=-1,
)

# Maps levels to [0, max_level-min_level].
levels -= min_level
level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
boundary = tf.cast(
tf.concat(
level_strides = ops.numpy.power([[2.0]], ops.cast(levels, "float32"))
boundary = ops.cast(
ops.concatenate(
[
tf.expand_dims(
[[tf.cast(max_feature_height, tf.float32)]]
ops.expand_dims(
[[ops.cast(max_feature_height, "float32")]]
/ level_strides
- 1,
axis=-1,
),
tf.expand_dims(
[[tf.cast(max_feature_width, tf.float32)]]
ops.expand_dims(
[[ops.cast(max_feature_width, "float32")]]
/ level_strides
- 1,
axis=-1,
Expand All @@ -309,42 +329,44 @@ def multilevel_crop_and_resize(
box_gridx0x1,
) = _compute_grid_positions(boxes, boundary, output_size, sample_offset)

x_indices = tf.cast(
tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
dtype=tf.int32,
x_indices = ops.cast(
ops.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
dtype="int32",
)
y_indices = tf.cast(
tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
dtype=tf.int32,
y_indices = ops.cast(
ops.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
dtype="int32",
)

batch_size_offset = tf.tile(
tf.reshape(
tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]
batch_size_offset = ops.tile(
ops.reshape(
ops.arange(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]
),
[1, num_boxes, output_size * 2, output_size * 2],
)
# Get level offset for each box. Each box belongs to one level.
levels_offset = tf.tile(
tf.reshape(
tf.gather(level_dim_offsets, levels),
levels_offset = ops.tile(
ops.reshape(
keras.backend.gather(level_dim_offsets, levels),
[batch_size, num_boxes, 1, 1],
),
[1, 1, output_size * 2, output_size * 2],
)
y_indices_offset = tf.tile(
tf.reshape(
y_indices_offset = ops.tile(
ops.reshape(
y_indices
* tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
* ops.expand_dims(
keras.backend.gather(height_dim_sizes, levels), -1
),
[batch_size, num_boxes, output_size * 2, 1],
),
[1, 1, 1, output_size * 2],
)
x_indices_offset = tf.tile(
tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
x_indices_offset = ops.tile(
ops.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
[1, 1, output_size * 2, 1],
)
indices = tf.reshape(
indices = ops.reshape(
batch_size_offset
+ levels_offset
+ y_indices_offset
Expand All @@ -354,8 +376,8 @@ def multilevel_crop_and_resize(

# TODO(tanzhenyu): replace tf.gather with tf.gather_nd and try to get
# similar performance.
features_per_box = tf.reshape(
tf.gather(features_r2, indices),
features_per_box = ops.reshape(
keras.backend.gather(features_r2, indices),
[
batch_size,
num_boxes,
Expand Down
3 changes: 2 additions & 1 deletion keras_cv/layers/object_detection/roi_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from keras_cv import bounding_box
from keras_cv.api_export import keras_cv_export
from keras_cv.backend import assert_tf_keras
from keras_cv.backend import ops


@keras_cv_export("keras_cv.layers.ROIGenerator")
Expand Down Expand Up @@ -148,7 +149,7 @@ def per_level_gen(boxes, scores):
# scores can also be [batch_size, num_boxes, 1]
if len(scores_shape) == 3:
scores = tf.squeeze(scores, axis=-1)
_, num_boxes = scores.get_shape().as_list()
num_boxes = ops.shape(boxes)[1]
level_pre_nms_topk = min(num_boxes, pre_nms_topk)
level_post_nms_topk = min(num_boxes, post_nms_topk)
scores, sorted_indices = tf.nn.top_k(
Expand Down
Loading