[RetinaNet] Add FPN, RetinaNet label encoder as part of phase 1 (#1885)

* Rebased phase 1 changes * nit
keras-team · Sep 27, 2024 · 1ffc0d1 · 1ffc0d1
1 parent 6ea5a7b
commit 1ffc0d1
Show file tree

Hide file tree

Showing 9 changed files with 1,096 additions and 23 deletions.
diff --git a/keras_hub/src/models/retinanet/anchor_generator.py b/keras_hub/src/models/retinanet/anchor_generator.py
@@ -24,29 +24,31 @@ class AnchorGenerator(keras.layers.Layer):
         for larger objects.
 
     Args:
-        bounding_box_format (str): The format of the bounding boxes
+        bounding_box_format: str. The format of the bounding boxes
             to be generated. Expected to be a string like 'xyxy', 'xywh', etc.
-        min_level (int): Minimum level of the output feature pyramid.
-        max_level (int): Maximum level of the output feature pyramid.
-        num_scales (int): Number of intermediate scales added on each level.
+        min_level: int. Minimum level of the output feature pyramid.
+        max_level: int. Maximum level of the output feature pyramid.
+        num_scales: int. Number of intermediate scales added on each level.
             For example, num_scales=2 adds one additional intermediate anchor
             scale [2^0, 2^0.5] on each level.
-        aspect_ratios (list of float): Aspect ratios of anchors added on
+        aspect_ratios:  List[float]. Aspect ratios of anchors added on
             each level. Each number indicates the ratio of width to height.
-        anchor_size (float): Scale of size of the base anchor relative to the
+        anchor_size: float. Scale of size of the base anchor relative to the
             feature stride 2^level.
 
     Call arguments:
-        images (Optional[Tensor]): An image tensor with shape `[B, H, W, C]` or
-            `[H, W, C]`. If provided, its shape will be used to determine anchor
+        inputs: An image tensor with shape `[B, H, W, C]` or
+            `[H, W, C]`. Its shape will be used to determine anchor
             sizes.
 
     Returns:
         Dict: A dictionary mapping feature levels
-        (e.g., 'P3', 'P4', etc.) to anchor boxes. Each entry contains a tensor
-        of shape `(H/stride * W/stride * num_anchors_per_location, 4)`,
-        where H and W are the height and width of the image, stride is 2^level,
-        and num_anchors_per_location is `num_scales * len(aspect_ratios)`.
+            (e.g., 'P3', 'P4', etc.) to anchor boxes. Each entry contains a
+            tensor  of shape
+            `(H/stride * W/stride * num_anchors_per_location, 4)`,
+            where H and W are the height and width of the image,
+            stride is 2^level, and num_anchors_per_location is
+            `num_scales * len(aspect_ratios)`.
 
     Example:
     ```python
@@ -81,8 +83,8 @@ def __init__(
         self.anchor_size = anchor_size
         self.built = True
 
-    def call(self, images):
-        images_shape = ops.shape(images)
+    def call(self, inputs):
+        images_shape = ops.shape(inputs)
         if len(images_shape) == 4:
             image_shape = images_shape[1:-1]
         else:
@@ -147,8 +149,18 @@ def call(self, images):
 
     def compute_output_shape(self, input_shape):
         multilevel_boxes_shape = {}
-        for level in range(self.min_level, self.max_level + 1):
-            multilevel_boxes_shape[f"P{level}"] = (None, None, 4)
+        if len(input_shape) == 4:
+            image_height, image_width = input_shape[1:-1]
+        else:
+            image_height, image_width = input_shape[:-1]
+
+        for i in range(self.min_level, self.max_level + 1):
+            multilevel_boxes_shape[f"P{i}"] = (
+                (image_height // 2 ** (i))
+                * (image_width // 2 ** (i))
+                * self.anchors_per_location,
+                4,
+            )
         return multilevel_boxes_shape
 
     @property

diff --git a/keras_hub/src/models/retinanet/anchor_generator_test.py b/keras_hub/src/models/retinanet/anchor_generator_test.py
@@ -1,3 +1,4 @@
+import numpy as np
 from absl.testing import parameterized
 from keras import ops
 
@@ -7,6 +8,32 @@
 
 
 class AnchorGeneratorTest(TestCase):
+    def test_layer_behaviors(self):
+        images_shape = (8, 128, 128, 3)
+        self.run_layer_test(
+            cls=AnchorGenerator,
+            init_kwargs={
+                "bounding_box_format": "xyxy",
+                "min_level": 3,
+                "max_level": 7,
+                "num_scales": 3,
+                "aspect_ratios": [0.5, 1.0, 2.0],
+                "anchor_size": 8,
+            },
+            input_data=np.random.uniform(size=images_shape),
+            expected_output_shape={
+                "P3": (2304, 4),
+                "P4": (576, 4),
+                "P5": (144, 4),
+                "P6": (36, 4),
+                "P7": (9, 4),
+            },
+            expected_num_trainable_weights=0,
+            expected_num_non_trainable_weights=0,
+            run_training_check=False,
+            run_precision_checks=False,
+        )
+
     @parameterized.parameters(
         # Single scale anchor
         ("yxyx", 5, 5, 1, [1.0], 2.0, [64, 64])
@@ -86,7 +113,7 @@ def test_anchor_generator(
             anchor_size,
         )
         images = ops.ones(shape=(1, image_shape[0], image_shape[1], 3))
-        multilevel_boxes = anchor_generator(images=images)
+        multilevel_boxes = anchor_generator(images)
         for key in expected_boxes:
             expected_boxes[key] = ops.convert_to_tensor(expected_boxes[key])
             expected_boxes[key] = convert_format(