keras-team · ariG23498 · Aug 4, 2023 · Aug 17, 2023 · Aug 17, 2023 · Aug 18, 2023
diff --git a/keras_cv/layers/object_detection/roi_align.py b/keras_cv/layers/object_detection/roi_align.py
@@ -17,11 +17,13 @@
 from typing import Optional
 from typing import Tuple
 
+# TODO (ariG23498): remove tf and correct the type imports
 import tensorflow as tf
-from tensorflow import keras
 
 from keras_cv import bounding_box
 from keras_cv.backend import assert_tf_keras
+from keras_cv.backend import keras
+from keras_cv.backend import ops
 
 
 def _feature_bilinear_interpolation(
@@ -49,7 +51,7 @@ def _feature_bilinear_interpolation(
       A 5-D tensor representing feature crop of shape
       [batch_size, num_boxes, output_size, output_size, num_filters].
     """
-    features_shape = tf.shape(features)
+    features_shape = ops.shape(features)
     batch_size, num_boxes, output_size, num_filters = (
         features_shape[0],
         features_shape[1],
@@ -58,22 +60,28 @@ def _feature_bilinear_interpolation(
     )
 
     output_size = output_size // 2
-    kernel_y = tf.reshape(kernel_y, [batch_size, num_boxes, output_size * 2, 1])
-    kernel_x = tf.reshape(kernel_x, [batch_size, num_boxes, 1, output_size * 2])
+    kernel_y = ops.reshape(
+        kernel_y, [batch_size, num_boxes, output_size * 2, 1]
+    )
+    kernel_x = ops.reshape(
+        kernel_x, [batch_size, num_boxes, 1, output_size * 2]
+    )
     # Use implicit broadcast to generate the interpolation kernel. The
     # multiplier `4` is for avg pooling.
     interpolation_kernel = kernel_y * kernel_x * 4
 
     # Interpolate the gathered features with computed interpolation kernels.
-    features *= tf.cast(
-        tf.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype
+    features *= ops.cast(
+        ops.expand_dims(interpolation_kernel, axis=-1), dtype=features.dtype
     )
-    features = tf.reshape(
+    features = ops.reshape(
         features,
         [batch_size * num_boxes, output_size * 2, output_size * 2, num_filters],
     )
-    features = tf.nn.avg_pool(features, [1, 2, 2, 1], [1, 2, 2, 1], "VALID")
-    features = tf.reshape(
+    features = ops.nn.average_pool(
+        features, [1, 2, 2, 1], [1, 2, 2, 1], "VALID"
+    )
+    features = ops.reshape(
         features, [batch_size, num_boxes, output_size, output_size, num_filters]
     )
     return features
@@ -108,10 +116,10 @@ def _compute_grid_positions(
       box_grid_y0y1: Tensor of size [batch_size, boxes, output_size, 2]
       box_grid_x0x1: Tensor of size [batch_size, boxes, output_size, 2]
     """
-    boxes_shape = tf.shape(boxes)
+    boxes_shape = ops.shape(boxes)
     batch_size, num_boxes = boxes_shape[0], boxes_shape[1]
     if batch_size is None:
-        batch_size = tf.shape(boxes)[0]
+        batch_size = ops.shape(boxes)[0]
     box_grid_x = []
     box_grid_y = []
     for i in range(output_size):
@@ -121,29 +129,33 @@ def _compute_grid_positions(
         box_grid_y.append(
             boxes[:, :, 0] + (i + sample_offset) * boxes[:, :, 2] / output_size
         )
-    box_grid_x = tf.stack(box_grid_x, axis=2)
-    box_grid_y = tf.stack(box_grid_y, axis=2)
+    box_grid_x = ops.stack(box_grid_x, axis=2)
+    box_grid_y = ops.stack(box_grid_y, axis=2)
 
-    box_grid_y0 = tf.floor(box_grid_y)
-    box_grid_x0 = tf.floor(box_grid_x)
-    box_grid_x0 = tf.maximum(tf.cast(0.0, dtype=box_grid_x0.dtype), box_grid_x0)
-    box_grid_y0 = tf.maximum(tf.cast(0.0, dtype=box_grid_y0.dtype), box_grid_y0)
+    box_grid_y0 = ops.floor(box_grid_y)
+    box_grid_x0 = ops.floor(box_grid_x)
+    box_grid_x0 = ops.maximum(
+        ops.cast(0.0, dtype=box_grid_x0.dtype), box_grid_x0
+    )
+    box_grid_y0 = ops.maximum(
+        ops.cast(0.0, dtype=box_grid_y0.dtype), box_grid_y0
+    )
 
-    box_grid_x0 = tf.minimum(
-        box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)
+    box_grid_x0 = ops.minimum(
+        box_grid_x0, ops.expand_dims(boundaries[:, :, 1], -1)
     )
-    box_grid_x1 = tf.minimum(
-        box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1)
+    box_grid_x1 = ops.minimum(
+        box_grid_x0 + 1, ops.expand_dims(boundaries[:, :, 1], -1)
     )
-    box_grid_y0 = tf.minimum(
-        box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)
+    box_grid_y0 = ops.minimum(
+        box_grid_y0, ops.expand_dims(boundaries[:, :, 0], -1)
     )
-    box_grid_y1 = tf.minimum(
-        box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1)
+    box_grid_y1 = ops.minimum(
+        box_grid_y0 + 1, ops.expand_dims(boundaries[:, :, 0], -1)
     )
 
-    box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1)
-    box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1)
+    box_gridx0x1 = ops.stack([box_grid_x0, box_grid_x1], axis=-1)
+    box_gridy0y1 = ops.stack([box_grid_y0, box_grid_y1], axis=-1)
 
     # The RoIAlign feature f can be computed by bilinear interpolation of four
     # neighboring feature points f0, f1, f2, and f3.
@@ -155,11 +167,11 @@ def _compute_grid_positions(
     lx = box_grid_x - box_grid_x0
     hy = 1.0 - ly
     hx = 1.0 - lx
-    kernel_y = tf.reshape(
-        tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]
+    kernel_y = ops.reshape(
+        ops.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size, 2, 1]
     )
-    kernel_x = tf.reshape(
-        tf.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]
+    kernel_x = ops.reshape(
+        ops.stack([hx, lx], axis=3), [batch_size, num_boxes, output_size, 2, 1]
     )
     return kernel_y, kernel_x, box_gridy0y1, box_gridx0x1
 
@@ -194,39 +206,43 @@ def multilevel_crop_and_resize(
       [batch_size, num_boxes, output_size, output_size, num_filters].
     """
 
-    with tf.name_scope("multilevel_crop_and_resize"):
+    with keras.backend.name_scope("multilevel_crop_and_resize"):
         levels_str = list(features.keys())
         # Levels are represented by strings with a prefix "P" to represent
         # pyramid levels. The integer level can be obtained by looking at
         # the value that follows the "P".
         levels = [int(level_str[1:]) for level_str in levels_str]
         min_level = min(levels)
         max_level = max(levels)
-        features_shape = tf.shape(features[f"P{min_level}"])
+        features_shape = ops.shape(features[f"P{min_level}"])
         batch_size, max_feature_height, max_feature_width, num_filters = (
             features_shape[0],
             features_shape[1],
             features_shape[2],
             features_shape[3],
         )
 
-        num_boxes = tf.shape(boxes)[1]
+        num_boxes = ops.shape(boxes)[1]
 
         # Stack feature pyramid into a features_all of shape
         # [batch_size, levels, height, width, num_filters].
         features_all = []
         feature_heights = []
         feature_widths = []
         for level in range(min_level, max_level + 1):
-            shape = features[f"P{level}"].get_shape().as_list()
+            shape = ops.shape(features[f"P{level}"])
             feature_heights.append(shape[1])
             feature_widths.append(shape[2])
             # Concat tensor of [batch_size, height_l * width_l, num_filters] for
             # each level.
             features_all.append(
-                tf.reshape(features[f"P{level}"], [batch_size, -1, num_filters])
+                ops.reshape(
+                    features[f"P{level}"], [batch_size, -1, num_filters]
+                )
             )
-        features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters])
+        features_r2 = ops.reshape(
+            ops.concatenate(features_all, 1), [-1, num_filters]
+        )
 
         # Calculate height_l * width_l for each level.
         level_dim_sizes = [
@@ -238,59 +254,63 @@ def multilevel_crop_and_resize(
         for i in range(len(feature_widths) - 1):
             level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
         batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
-        level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
-        height_dim_sizes = tf.constant(feature_widths, tf.int32)
+        level_dim_offsets = keras.backend.constant(level_dim_offsets, "int32")
+        height_dim_sizes = keras.backend.constant(feature_widths, "int32")
 
         # Assigns boxes to the right level.
         box_width = boxes[:, :, 3] - boxes[:, :, 1]
         box_height = boxes[:, :, 2] - boxes[:, :, 0]
-        areas_sqrt = tf.sqrt(
-            tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32)
+        areas_sqrt = ops.sqrt(
+            ops.cast(box_height, "float32") * ops.cast(box_width, "float32")
         )
 
         # following the FPN paper to divide by 224.
-        levels = tf.cast(
-            tf.math.floordiv(
-                tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)),
-                tf.math.log(2.0),
+        levels = ops.cast(
+            ops.numpy.floor_divide(
+                ops.numpy.log(
+                    ops.numpy.divide(areas_sqrt, 224.0)
+                ),  # tf.math.divide_no_nan
+                ops.numpy.log(2.0),
             )
             + 4.0,
-            dtype=tf.int32,
+            dtype="int32",
         )
         # Maps levels between [min_level, max_level].
-        levels = tf.minimum(max_level, tf.maximum(levels, min_level))
+        levels = ops.minimum(max_level, ops.maximum(levels, min_level))
 
         # Projects box location and sizes to corresponding feature levels.
-        scale_to_level = tf.cast(
-            tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)),
+        scale_to_level = ops.cast(
+            ops.numpy.power(
+                keras.backend.constant(2.0), ops.cast(levels, "float32")
+            ),
             dtype=boxes.dtype,
         )
-        boxes /= tf.expand_dims(scale_to_level, axis=2)
+        boxes /= ops.expand_dims(scale_to_level, axis=2)
         box_width /= scale_to_level
         box_height /= scale_to_level
-        boxes = tf.concat(
+        boxes = ops.concatenate(
             [
                 boxes[:, :, 0:2],
-                tf.expand_dims(box_height, -1),
-                tf.expand_dims(box_width, -1),
+                ops.expand_dims(box_height, -1),
+                ops.expand_dims(box_width, -1),
             ],
             axis=-1,
         )
 
         # Maps levels to [0, max_level-min_level].
         levels -= min_level
-        level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
-        boundary = tf.cast(
-            tf.concat(
+        level_strides = ops.numpy.power([[2.0]], ops.cast(levels, "float32"))
+        boundary = ops.cast(
+            ops.concatenate(
                 [
-                    tf.expand_dims(
-                        [[tf.cast(max_feature_height, tf.float32)]]
+                    ops.expand_dims(
+                        [[ops.cast(max_feature_height, "float32")]]
                         / level_strides
                         - 1,
                         axis=-1,
                     ),
-                    tf.expand_dims(
-                        [[tf.cast(max_feature_width, tf.float32)]]
+                    ops.expand_dims(
+                        [[ops.cast(max_feature_width, "float32")]]
                         / level_strides
                         - 1,
                         axis=-1,
@@ -309,42 +329,44 @@ def multilevel_crop_and_resize(
             box_gridx0x1,
         ) = _compute_grid_positions(boxes, boundary, output_size, sample_offset)
 
-        x_indices = tf.cast(
-            tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
-            dtype=tf.int32,
+        x_indices = ops.cast(
+            ops.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
+            dtype="int32",
         )
-        y_indices = tf.cast(
-            tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
-            dtype=tf.int32,
+        y_indices = ops.cast(
+            ops.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
+            dtype="int32",
         )
 
-        batch_size_offset = tf.tile(
-            tf.reshape(
-                tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]
+        batch_size_offset = ops.tile(
+            ops.reshape(
+                ops.arange(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]
             ),
             [1, num_boxes, output_size * 2, output_size * 2],
         )
         # Get level offset for each box. Each box belongs to one level.
-        levels_offset = tf.tile(
-            tf.reshape(
-                tf.gather(level_dim_offsets, levels),
+        levels_offset = ops.tile(
+            ops.reshape(
+                keras.backend.gather(level_dim_offsets, levels),
                 [batch_size, num_boxes, 1, 1],
             ),
             [1, 1, output_size * 2, output_size * 2],
         )
-        y_indices_offset = tf.tile(
-            tf.reshape(
+        y_indices_offset = ops.tile(
+            ops.reshape(
                 y_indices
-                * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
+                * ops.expand_dims(
+                    keras.backend.gather(height_dim_sizes, levels), -1
+                ),
                 [batch_size, num_boxes, output_size * 2, 1],
             ),
             [1, 1, 1, output_size * 2],
         )
-        x_indices_offset = tf.tile(
-            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
+        x_indices_offset = ops.tile(
+            ops.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
             [1, 1, output_size * 2, 1],
         )
-        indices = tf.reshape(
+        indices = ops.reshape(
             batch_size_offset
             + levels_offset
             + y_indices_offset
@@ -354,8 +376,8 @@ def multilevel_crop_and_resize(
 
         # TODO(tanzhenyu): replace tf.gather with tf.gather_nd and try to get
         #  similar performance.
-        features_per_box = tf.reshape(
-            tf.gather(features_r2, indices),
+        features_per_box = ops.reshape(
+            keras.backend.gather(features_r2, indices),
             [
                 batch_size,
                 num_boxes,

diff --git a/keras_cv/layers/object_detection/roi_generator.py b/keras_cv/layers/object_detection/roi_generator.py
@@ -23,6 +23,7 @@
 from keras_cv import bounding_box
 from keras_cv.api_export import keras_cv_export
 from keras_cv.backend import assert_tf_keras
+from keras_cv.backend import ops
 
 
 @keras_cv_export("keras_cv.layers.ROIGenerator")
@@ -148,7 +149,7 @@ def per_level_gen(boxes, scores):
             # scores can also be [batch_size, num_boxes, 1]
             if len(scores_shape) == 3:
                 scores = tf.squeeze(scores, axis=-1)
-            _, num_boxes = scores.get_shape().as_list()
+            num_boxes = ops.shape(boxes)[1]
             level_pre_nms_topk = min(num_boxes, pre_nms_topk)
             level_post_nms_topk = min(num_boxes, post_nms_topk)
             scores, sorted_indices = tf.nn.top_k(