From a71b68b1bd572aeba6f20b6b8c9889562418f32e Mon Sep 17 00:00:00 2001 From: Nikita Shevtsov <61932814+Travvy88@users.noreply.github.com> Date: Thu, 28 Sep 2023 12:32:40 +0300 Subject: [PATCH] TLDR-473 add bbox class from dedoc (#13) Co-authored-by: Nikita Shevtsov --- CHANGELOG.md | 4 ++ VERSION | 2 +- dedocutils/data_structures/bbox.py | 63 +++++++++++++++++-- .../models/classification/resnet/pytorch.py | 2 + .../doctr/models/classification/zoo.py | 2 +- .../differentiable_binarization/base.py | 5 +- .../differentiable_binarization/pytorch.py | 7 ++- .../doctr/models/detection/linknet/base.py | 5 +- .../doctr/models/detection/linknet/pytorch.py | 7 ++- .../doctr/models/detection/zoo.py | 2 +- .../doctr/transforms/functional/pytorch.py | 7 ++- .../doctr/transforms/modules/base.py | 3 +- .../doctr/transforms/modules/pytorch.py | 5 +- .../doctr/utils/geometry.py | 5 +- 14 files changed, 93 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e29bf68..899d4b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ Changelog ========= +v0.3.3 (2023-09-28) +------------------- +* Update `BBox` class + v0.3.2 (2023-09-25) ------------------- * Add intervals to dependencies versions diff --git a/VERSION b/VERSION index 9fc80f9..87a0871 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.3.2 \ No newline at end of file +0.3.3 \ No newline at end of file diff --git a/dedocutils/data_structures/bbox.py b/dedocutils/data_structures/bbox.py index 4a3e62e..bf69090 100644 --- a/dedocutils/data_structures/bbox.py +++ b/dedocutils/data_structures/bbox.py @@ -1,12 +1,17 @@ +import math from collections import OrderedDict from dataclasses import dataclass from typing import Dict, Tuple +import numpy as np + @dataclass class BBox: """ - Box around some page object, the coordinate system starts from top left corner + Bounding box around some page object, the coordinate system starts from top left corner. + """ + """ 0------------------------------------------------------------------------------------------------> x | BBox @@ -21,10 +26,19 @@ class BBox: | V y """ - x_top_left: int - y_top_left: int - width: int - height: int + def __init__(self, x_top_left: int, y_top_left: int, width: int, height: int) -> None: + """ + The following parameters should have values of pixels number. + + :param x_top_left: x coordinate of the bbox top left corner + :param y_top_left: y coordinate of the bbox top left corner + :param width: bounding box width + :param height: bounding box height + """ + self.x_top_left = x_top_left + self.y_top_left = y_top_left + self.width = width + self.height = height @property def x_bottom_right(self) -> int: @@ -34,17 +48,56 @@ def x_bottom_right(self) -> int: def y_bottom_right(self) -> int: return self.y_top_left + self.height + @staticmethod + def crop_image_by_box(image: np.ndarray, bbox: "BBox") -> np.ndarray: + return image[bbox.y_top_left:bbox.y_bottom_right, bbox.x_top_left:bbox.x_bottom_right] + + def rotate_coordinates(self, angle_rotate: float, image_shape: Tuple[int]) -> None: + xb, yb = self.x_top_left, self.y_top_left + xe, ye = self.x_bottom_right, self.y_bottom_right + rad = angle_rotate * math.pi / 180 + + xc = image_shape[1] / 2 + yc = image_shape[0] / 2 + + bbox_xb = min((int(float(xb - xc) * math.cos(rad) - float(yb - yc) * math.sin(rad) + xc)), image_shape[1]) + bbox_yb = min((int(float(yb - yc) * math.cos(rad) + float(xb - xc) * math.sin(rad) + yc)), image_shape[0]) + bbox_xe = min((int(float(xe - xc) * math.cos(rad) - float(ye - yc) * math.sin(rad) + xc)), image_shape[1]) + bbox_ye = min((int(float(ye - yc) * math.cos(rad) + float(xe - xc) * math.sin(rad) + yc)), image_shape[0]) + self.__init__(bbox_xb, bbox_yb, bbox_xe - bbox_xb, bbox_ye - bbox_yb) + + def __str__(self) -> str: + return f"BBox(x = {self.x_top_left} y = {self.y_top_left}, w = {self.width}, h = {self.height})" + + def __repr__(self) -> str: + return self.__str__() + @property def square(self) -> int: + """ + Square of the bbox. + """ return self.height * self.width @staticmethod def from_two_points(top_left: Tuple[int, int], bottom_right: Tuple[int, int]) -> "BBox": + """ + Make the bounding box from two points. + + :param top_left: (x, y) point of the bbox top left corner + :param bottom_right: (x, y) point of the bbox bottom right corner + """ x_top_left, y_top_left = top_left x_bottom_right, y_bottom_right = bottom_right return BBox(x_top_left=x_top_left, y_top_left=y_top_left, width=x_bottom_right - x_top_left, height=y_bottom_right - y_top_left) def have_intersection_with_box(self, box: "BBox", threshold: float = 0.3) -> bool: + """ + Check if the current bounding box has the intersection with another one. + + :param box: another bounding box to check intersection with + :param threshold: the lowest value of the intersection over union used get boolean result + """ # determine the (x, y)-coordinates of the intersection rectangle x_min = max(self.x_top_left, box.x_top_left) y_min = max(self.y_top_left, box.y_top_left) diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py index 6ccd014..cb6b262 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py @@ -1,11 +1,13 @@ # noqa from typing import Any, Callable, Dict, List, Optional, Tuple + from torch import nn from torchvision.models.resnet import BasicBlock from torchvision.models.resnet import ResNet as TVResNet from torchvision.models.resnet import resnet18 as tv_resnet18 from torchvision.models.resnet import resnet34 as tv_resnet34 from torchvision.models.resnet import resnet50 as tv_resnet50 + from dedocutils.text_detection.doctr_text_detector.doctr.datasets.vocabs import VOCABS from ...utils import conv_sequence_pt, load_pretrained_params diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py index e207830..12562b7 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py @@ -1,8 +1,8 @@ from typing import Any +from .predictor import CropOrientationPredictor from .. import classification from ..preprocessor import PreProcessor -from .predictor import CropOrientationPredictor __all__ = ["crop_orientation_predictor"] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py index 8c0c2b7..b3d2a41 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py @@ -1,9 +1,10 @@ +from typing import List, Tuple + import cv2 import numpy as np import pyclipper - -from typing import List, Tuple from shapely.geometry import Polygon + from ..core import DetectionPostProcessor __all__ = ['DBPostProcessor'] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py index 83e35aa..946009f 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py @@ -1,14 +1,15 @@ +from typing import Any, Callable, Dict, List, Optional + import numpy as np import torch - from torch import nn -from typing import Any, Callable, Dict, List, Optional from torch.nn import functional as F from torchvision.models import resnet34, resnet50 from torchvision.models._utils import IntermediateLayerGetter from torchvision.ops.deform_conv import DeformConv2d -from ...utils import load_pretrained_params + from .base import DBPostProcessor, _DBNet +from ...utils import load_pretrained_params __all__ = ['DBNet', 'db_resnet50', 'db_resnet34', 'db_resnet50_rotation'] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py index 8400251..f913758 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py @@ -1,9 +1,10 @@ +from typing import List, Tuple + import cv2 import numpy as np import pyclipper - from shapely.geometry import Polygon -from typing import List, Tuple + from dedocutils.text_detection.doctr_text_detector.doctr.models.core import BaseModel from ..core import DetectionPostProcessor diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py index 49a1b5c..ac97140 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py @@ -1,13 +1,14 @@ +from typing import Any, Callable, Dict, List, Optional, Tuple + import numpy as np import torch - from torch import nn -from typing import Any, Callable, Dict, List, Optional, Tuple from torch.nn import functional as F from torchvision.models._utils import IntermediateLayerGetter + from dedocutils.text_detection.doctr_text_detector.doctr.models.classification import resnet18, resnet34, resnet50 -from ...utils import load_pretrained_params from .base import LinkNetPostProcessor, _LinkNet +from ...utils import load_pretrained_params __all__ = ['LinkNet', 'linknet_resnet18', 'linknet_resnet34', 'linknet_resnet50'] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py index c3abd8d..95d1424 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py @@ -1,8 +1,8 @@ from typing import Any +from .predictor import DetectionPredictor from .. import detection from ..preprocessor import PreProcessor -from .predictor import DetectionPredictor __all__ = ["detection_predictor"] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py index 7c36f23..70e1de8 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py @@ -1,10 +1,11 @@ # noqa -import numpy as np -import torch - from copy import deepcopy from typing import Tuple + +import numpy as np +import torch from torchvision.transforms import functional as F + from dedocutils.text_detection.doctr_text_detector.doctr.utils.geometry import rotate_abs_geoms from .base import create_shadow_mask, crop_boxes diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py index 3ec260e..3d410c2 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py @@ -1,8 +1,9 @@ import math import random +from typing import Any, Callable, Dict, List, Tuple + import numpy as np -from typing import Any, Callable, Dict, List, Tuple from dedocutils.text_detection.doctr_text_detector.doctr.utils.repr import NestedObject from .. import functional as F diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py index 835562c..dbc1c75 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py @@ -1,13 +1,14 @@ # noqa import math +from typing import Any, Dict, Optional, Tuple, Union + import numpy as np import torch - -from typing import Any, Dict, Optional, Tuple, Union from PIL.Image import Image from torch.nn.functional import pad from torchvision.transforms import functional as F from torchvision.transforms import transforms as T + from ..functional.pytorch import random_shadow __all__ = ['Resize', 'GaussianNoise', 'ChannelShuffle', 'RandomHorizontalFlip', 'RandomShadow'] diff --git a/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py b/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py index 04c41ea..a2d5a7b 100644 --- a/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py +++ b/dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py @@ -1,8 +1,9 @@ +from math import ceil +from typing import List, Optional, Tuple, Union + import cv2 import numpy as np -from math import ceil -from typing import List, Optional, Tuple, Union from .common_types import BoundingBox, Polygon4P __all__ = ['bbox_to_polygon', 'polygon_to_bbox', 'resolve_enclosing_bbox', 'resolve_enclosing_rbbox',