TLDR-473 add bbox class from dedoc (#13)

Co-authored-by: Nikita Shevtsov <[email protected]>
ispras · Sep 28, 2023 · a71b68b · a71b68b
1 parent dd93431
commit a71b68b
Show file tree

Hide file tree

Showing 14 changed files with 93 additions and 26 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 Changelog
 =========
 
+v0.3.3 (2023-09-28)
+-------------------
+* Update `BBox` class
+
 v0.3.2 (2023-09-25)
 -------------------
 * Add intervals to dependencies versions

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.3.2
+0.3.3
diff --git a/dedocutils/data_structures/bbox.py b/dedocutils/data_structures/bbox.py
@@ -1,12 +1,17 @@
+import math
 from collections import OrderedDict
 from dataclasses import dataclass
 from typing import Dict, Tuple
 
+import numpy as np
+
 
 @dataclass
 class BBox:
     """
-    Box around some page object, the coordinate system starts from top left corner
+    Bounding box around some page object, the coordinate system starts from top left corner.
+    """
+    """
 
     0------------------------------------------------------------------------------------------------> x
     |                                   BBox
@@ -21,10 +26,19 @@ class BBox:
     |
     V y
     """
-    x_top_left: int
-    y_top_left: int
-    width: int
-    height: int
+    def __init__(self, x_top_left: int, y_top_left: int, width: int, height: int) -> None:
+        """
+        The following parameters should have values of pixels number.
+
+        :param x_top_left: x coordinate of the bbox top left corner
+        :param y_top_left: y coordinate of the bbox top left corner
+        :param width: bounding box width
+        :param height: bounding box height
+        """
+        self.x_top_left = x_top_left
+        self.y_top_left = y_top_left
+        self.width = width
+        self.height = height
 
     @property
     def x_bottom_right(self) -> int:
@@ -34,17 +48,56 @@ def x_bottom_right(self) -> int:
     def y_bottom_right(self) -> int:
         return self.y_top_left + self.height
 
+    @staticmethod
+    def crop_image_by_box(image: np.ndarray, bbox: "BBox") -> np.ndarray:
+        return image[bbox.y_top_left:bbox.y_bottom_right, bbox.x_top_left:bbox.x_bottom_right]
+
+    def rotate_coordinates(self, angle_rotate: float, image_shape: Tuple[int]) -> None:
+        xb, yb = self.x_top_left, self.y_top_left
+        xe, ye = self.x_bottom_right, self.y_bottom_right
+        rad = angle_rotate * math.pi / 180
+
+        xc = image_shape[1] / 2
+        yc = image_shape[0] / 2
+
+        bbox_xb = min((int(float(xb - xc) * math.cos(rad) - float(yb - yc) * math.sin(rad) + xc)), image_shape[1])
+        bbox_yb = min((int(float(yb - yc) * math.cos(rad) + float(xb - xc) * math.sin(rad) + yc)), image_shape[0])
+        bbox_xe = min((int(float(xe - xc) * math.cos(rad) - float(ye - yc) * math.sin(rad) + xc)), image_shape[1])
+        bbox_ye = min((int(float(ye - yc) * math.cos(rad) + float(xe - xc) * math.sin(rad) + yc)), image_shape[0])
+        self.__init__(bbox_xb, bbox_yb, bbox_xe - bbox_xb, bbox_ye - bbox_yb)
+
+    def __str__(self) -> str:
+        return f"BBox(x = {self.x_top_left} y = {self.y_top_left}, w = {self.width}, h = {self.height})"
+
+    def __repr__(self) -> str:
+        return self.__str__()
+
     @property
     def square(self) -> int:
+        """
+        Square of the bbox.
+        """
         return self.height * self.width
 
     @staticmethod
     def from_two_points(top_left: Tuple[int, int], bottom_right: Tuple[int, int]) -> "BBox":
+        """
+        Make the bounding box from two points.
+
+        :param top_left: (x, y) point of the bbox top left corner
+        :param bottom_right: (x, y) point of the bbox bottom right corner
+        """
         x_top_left, y_top_left = top_left
         x_bottom_right, y_bottom_right = bottom_right
         return BBox(x_top_left=x_top_left, y_top_left=y_top_left, width=x_bottom_right - x_top_left, height=y_bottom_right - y_top_left)
 
     def have_intersection_with_box(self, box: "BBox", threshold: float = 0.3) -> bool:
+        """
+        Check if the current bounding box has the intersection with another one.
+
+        :param box: another bounding box to check intersection with
+        :param threshold: the lowest value of the intersection over union used get boolean result
+        """
         # determine the (x, y)-coordinates of the intersection rectangle
         x_min = max(self.x_top_left, box.x_top_left)
         y_min = max(self.y_top_left, box.y_top_left)

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py
@@ -1,11 +1,13 @@
 # noqa
 from typing import Any, Callable, Dict, List, Optional, Tuple
+
 from torch import nn
 from torchvision.models.resnet import BasicBlock
 from torchvision.models.resnet import ResNet as TVResNet
 from torchvision.models.resnet import resnet18 as tv_resnet18
 from torchvision.models.resnet import resnet34 as tv_resnet34
 from torchvision.models.resnet import resnet50 as tv_resnet50
+
 from dedocutils.text_detection.doctr_text_detector.doctr.datasets.vocabs import VOCABS
 from ...utils import conv_sequence_pt, load_pretrained_params
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py
@@ -1,8 +1,8 @@
 from typing import Any
 
+from .predictor import CropOrientationPredictor
 from .. import classification
 from ..preprocessor import PreProcessor
-from .predictor import CropOrientationPredictor
 
 __all__ = ["crop_orientation_predictor"]
 

diff --git a/..._detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py b/..._detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py
@@ -1,9 +1,10 @@
+from typing import List, Tuple
+
 import cv2
 import numpy as np
 import pyclipper
-
-from typing import List, Tuple
 from shapely.geometry import Polygon
+
 from ..core import DetectionPostProcessor
 
 __all__ = ['DBPostProcessor']

diff --git a/...tection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py b/...tection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py
@@ -1,14 +1,15 @@
+from typing import Any, Callable, Dict, List, Optional
+
 import numpy as np
 import torch
-
 from torch import nn
-from typing import Any, Callable, Dict, List, Optional
 from torch.nn import functional as F
 from torchvision.models import resnet34, resnet50
 from torchvision.models._utils import IntermediateLayerGetter
 from torchvision.ops.deform_conv import DeformConv2d
-from ...utils import load_pretrained_params
+
 from .base import DBPostProcessor, _DBNet
+from ...utils import load_pretrained_params
 
 __all__ = ['DBNet', 'db_resnet50', 'db_resnet34', 'db_resnet50_rotation']
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py
@@ -1,9 +1,10 @@
+from typing import List, Tuple
+
 import cv2
 import numpy as np
 import pyclipper
-
 from shapely.geometry import Polygon
-from typing import List, Tuple
+
 from dedocutils.text_detection.doctr_text_detector.doctr.models.core import BaseModel
 from ..core import DetectionPostProcessor
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py
@@ -1,13 +1,14 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
 import numpy as np
 import torch
-
 from torch import nn
-from typing import Any, Callable, Dict, List, Optional, Tuple
 from torch.nn import functional as F
 from torchvision.models._utils import IntermediateLayerGetter
+
 from dedocutils.text_detection.doctr_text_detector.doctr.models.classification import resnet18, resnet34, resnet50
-from ...utils import load_pretrained_params
 from .base import LinkNetPostProcessor, _LinkNet
+from ...utils import load_pretrained_params
 
 __all__ = ['LinkNet', 'linknet_resnet18', 'linknet_resnet34', 'linknet_resnet50']
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py
@@ -1,8 +1,8 @@
 from typing import Any
 
+from .predictor import DetectionPredictor
 from .. import detection
 from ..preprocessor import PreProcessor
-from .predictor import DetectionPredictor
 
 __all__ = ["detection_predictor"]
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py
@@ -1,10 +1,11 @@
 # noqa
-import numpy as np
-import torch
-
 from copy import deepcopy
 from typing import Tuple
+
+import numpy as np
+import torch
 from torchvision.transforms import functional as F
+
 from dedocutils.text_detection.doctr_text_detector.doctr.utils.geometry import rotate_abs_geoms
 from .base import create_shadow_mask, crop_boxes
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py
@@ -1,8 +1,9 @@
 import math
 import random
+from typing import Any, Callable, Dict, List, Tuple
+
 import numpy as np
 
-from typing import Any, Callable, Dict, List, Tuple
 from dedocutils.text_detection.doctr_text_detector.doctr.utils.repr import NestedObject
 from .. import functional as F
 

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py
@@ -1,13 +1,14 @@
 # noqa
 import math
+from typing import Any, Dict, Optional, Tuple, Union
+
 import numpy as np
 import torch
-
-from typing import Any, Dict, Optional, Tuple, Union
 from PIL.Image import Image
 from torch.nn.functional import pad
 from torchvision.transforms import functional as F
 from torchvision.transforms import transforms as T
+
 from ..functional.pytorch import random_shadow
 
 __all__ = ['Resize', 'GaussianNoise', 'ChannelShuffle', 'RandomHorizontalFlip', 'RandomShadow']

diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py b/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py
@@ -1,8 +1,9 @@
+from math import ceil
+from typing import List, Optional, Tuple, Union
+
 import cv2
 import numpy as np
 
-from math import ceil
-from typing import List, Optional, Tuple, Union
 from .common_types import BoundingBox, Polygon4P
 
 __all__ = ['bbox_to_polygon', 'polygon_to_bbox', 'resolve_enclosing_bbox', 'resolve_enclosing_rbbox',