Skip to content

Commit

Permalink
TLDR-473 add bbox class from dedoc (#13)
Browse files Browse the repository at this point in the history
Co-authored-by: Nikita Shevtsov <[email protected]>
  • Loading branch information
Travvy88 and Nikita Shevtsov authored Sep 28, 2023
1 parent dd93431 commit a71b68b
Show file tree
Hide file tree
Showing 14 changed files with 93 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
=========

v0.3.3 (2023-09-28)
-------------------
* Update `BBox` class

v0.3.2 (2023-09-25)
-------------------
* Add intervals to dependencies versions
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.2
0.3.3
63 changes: 58 additions & 5 deletions dedocutils/data_structures/bbox.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import math
from collections import OrderedDict
from dataclasses import dataclass
from typing import Dict, Tuple

import numpy as np


@dataclass
class BBox:
"""
Box around some page object, the coordinate system starts from top left corner
Bounding box around some page object, the coordinate system starts from top left corner.
"""
"""
0------------------------------------------------------------------------------------------------> x
| BBox
Expand All @@ -21,10 +26,19 @@ class BBox:
|
V y
"""
x_top_left: int
y_top_left: int
width: int
height: int
def __init__(self, x_top_left: int, y_top_left: int, width: int, height: int) -> None:
"""
The following parameters should have values of pixels number.
:param x_top_left: x coordinate of the bbox top left corner
:param y_top_left: y coordinate of the bbox top left corner
:param width: bounding box width
:param height: bounding box height
"""
self.x_top_left = x_top_left
self.y_top_left = y_top_left
self.width = width
self.height = height

@property
def x_bottom_right(self) -> int:
Expand All @@ -34,17 +48,56 @@ def x_bottom_right(self) -> int:
def y_bottom_right(self) -> int:
return self.y_top_left + self.height

@staticmethod
def crop_image_by_box(image: np.ndarray, bbox: "BBox") -> np.ndarray:
return image[bbox.y_top_left:bbox.y_bottom_right, bbox.x_top_left:bbox.x_bottom_right]

def rotate_coordinates(self, angle_rotate: float, image_shape: Tuple[int]) -> None:
xb, yb = self.x_top_left, self.y_top_left
xe, ye = self.x_bottom_right, self.y_bottom_right
rad = angle_rotate * math.pi / 180

xc = image_shape[1] / 2
yc = image_shape[0] / 2

bbox_xb = min((int(float(xb - xc) * math.cos(rad) - float(yb - yc) * math.sin(rad) + xc)), image_shape[1])
bbox_yb = min((int(float(yb - yc) * math.cos(rad) + float(xb - xc) * math.sin(rad) + yc)), image_shape[0])
bbox_xe = min((int(float(xe - xc) * math.cos(rad) - float(ye - yc) * math.sin(rad) + xc)), image_shape[1])
bbox_ye = min((int(float(ye - yc) * math.cos(rad) + float(xe - xc) * math.sin(rad) + yc)), image_shape[0])
self.__init__(bbox_xb, bbox_yb, bbox_xe - bbox_xb, bbox_ye - bbox_yb)

def __str__(self) -> str:
return f"BBox(x = {self.x_top_left} y = {self.y_top_left}, w = {self.width}, h = {self.height})"

def __repr__(self) -> str:
return self.__str__()

@property
def square(self) -> int:
"""
Square of the bbox.
"""
return self.height * self.width

@staticmethod
def from_two_points(top_left: Tuple[int, int], bottom_right: Tuple[int, int]) -> "BBox":
"""
Make the bounding box from two points.
:param top_left: (x, y) point of the bbox top left corner
:param bottom_right: (x, y) point of the bbox bottom right corner
"""
x_top_left, y_top_left = top_left
x_bottom_right, y_bottom_right = bottom_right
return BBox(x_top_left=x_top_left, y_top_left=y_top_left, width=x_bottom_right - x_top_left, height=y_bottom_right - y_top_left)

def have_intersection_with_box(self, box: "BBox", threshold: float = 0.3) -> bool:
"""
Check if the current bounding box has the intersection with another one.
:param box: another bounding box to check intersection with
:param threshold: the lowest value of the intersection over union used get boolean result
"""
# determine the (x, y)-coordinates of the intersection rectangle
x_min = max(self.x_top_left, box.x_top_left)
y_min = max(self.y_top_left, box.y_top_left)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# noqa
from typing import Any, Callable, Dict, List, Optional, Tuple

from torch import nn
from torchvision.models.resnet import BasicBlock
from torchvision.models.resnet import ResNet as TVResNet
from torchvision.models.resnet import resnet18 as tv_resnet18
from torchvision.models.resnet import resnet34 as tv_resnet34
from torchvision.models.resnet import resnet50 as tv_resnet50

from dedocutils.text_detection.doctr_text_detector.doctr.datasets.vocabs import VOCABS
from ...utils import conv_sequence_pt, load_pretrained_params

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Any

from .predictor import CropOrientationPredictor
from .. import classification
from ..preprocessor import PreProcessor
from .predictor import CropOrientationPredictor

__all__ = ["crop_orientation_predictor"]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import List, Tuple

import cv2
import numpy as np
import pyclipper

from typing import List, Tuple
from shapely.geometry import Polygon

from ..core import DetectionPostProcessor

__all__ = ['DBPostProcessor']
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from typing import Any, Callable, Dict, List, Optional

import numpy as np
import torch

from torch import nn
from typing import Any, Callable, Dict, List, Optional
from torch.nn import functional as F
from torchvision.models import resnet34, resnet50
from torchvision.models._utils import IntermediateLayerGetter
from torchvision.ops.deform_conv import DeformConv2d
from ...utils import load_pretrained_params

from .base import DBPostProcessor, _DBNet
from ...utils import load_pretrained_params

__all__ = ['DBNet', 'db_resnet50', 'db_resnet34', 'db_resnet50_rotation']

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import List, Tuple

import cv2
import numpy as np
import pyclipper

from shapely.geometry import Polygon
from typing import List, Tuple

from dedocutils.text_detection.doctr_text_detector.doctr.models.core import BaseModel
from ..core import DetectionPostProcessor

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np
import torch

from torch import nn
from typing import Any, Callable, Dict, List, Optional, Tuple
from torch.nn import functional as F
from torchvision.models._utils import IntermediateLayerGetter

from dedocutils.text_detection.doctr_text_detector.doctr.models.classification import resnet18, resnet34, resnet50
from ...utils import load_pretrained_params
from .base import LinkNetPostProcessor, _LinkNet
from ...utils import load_pretrained_params

__all__ = ['LinkNet', 'linknet_resnet18', 'linknet_resnet34', 'linknet_resnet50']

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Any

from .predictor import DetectionPredictor
from .. import detection
from ..preprocessor import PreProcessor
from .predictor import DetectionPredictor

__all__ = ["detection_predictor"]

Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# noqa
import numpy as np
import torch

from copy import deepcopy
from typing import Tuple

import numpy as np
import torch
from torchvision.transforms import functional as F

from dedocutils.text_detection.doctr_text_detector.doctr.utils.geometry import rotate_abs_geoms
from .base import create_shadow_mask, crop_boxes

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import math
import random
from typing import Any, Callable, Dict, List, Tuple

import numpy as np

from typing import Any, Callable, Dict, List, Tuple
from dedocutils.text_detection.doctr_text_detector.doctr.utils.repr import NestedObject
from .. import functional as F

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# noqa
import math
from typing import Any, Dict, Optional, Tuple, Union

import numpy as np
import torch

from typing import Any, Dict, Optional, Tuple, Union
from PIL.Image import Image
from torch.nn.functional import pad
from torchvision.transforms import functional as F
from torchvision.transforms import transforms as T

from ..functional.pytorch import random_shadow

__all__ = ['Resize', 'GaussianNoise', 'ChannelShuffle', 'RandomHorizontalFlip', 'RandomShadow']
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from math import ceil
from typing import List, Optional, Tuple, Union

import cv2
import numpy as np

from math import ceil
from typing import List, Optional, Tuple, Union
from .common_types import BoundingBox, Polygon4P

__all__ = ['bbox_to_polygon', 'polygon_to_bbox', 'resolve_enclosing_bbox', 'resolve_enclosing_rbbox',
Expand Down

0 comments on commit a71b68b

Please sign in to comment.