-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
temp commit #1580
Closed
temp commit #1580
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,18 +1,37 @@ | ||||||
import random | ||||||
import types | ||||||
from typing import Any, Callable, Dict, Generator, Iterable, Iterator, List, Optional, Sequence, Tuple, Union | ||||||
from typing import ( | ||||||
Any, | ||||||
Callable, | ||||||
Dict, | ||||||
Generator, | ||||||
Iterable, | ||||||
Iterator, | ||||||
List, | ||||||
Literal, | ||||||
Optional, | ||||||
Sequence, | ||||||
Tuple, | ||||||
Union, | ||||||
cast, | ||||||
) | ||||||
from warnings import warn | ||||||
|
||||||
import numpy as np | ||||||
|
||||||
from albumentations.augmentations.crops.transforms import RandomSizedBBoxSafeCrop | ||||||
from albumentations.augmentations.functional import split_uniform_grid | ||||||
from albumentations.augmentations.geometric.resize import LongestMaxSize, Resize, SmallestMaxSize | ||||||
from albumentations.augmentations.geometric.transforms import PadIfNeeded | ||||||
from albumentations.augmentations.utils import is_grayscale_image | ||||||
from albumentations.core.composition import Compose | ||||||
from albumentations.core.transforms_interface import ReferenceBasedTransform | ||||||
from albumentations.core.types import BoxType, KeypointType, ReferenceImage, Targets | ||||||
from albumentations.random_utils import beta | ||||||
from albumentations.random_utils import beta, choice, shuffle | ||||||
|
||||||
from .functional import mix_arrays | ||||||
|
||||||
__all__ = ["MixUp"] | ||||||
__all__ = ["MixUp", "Mosaic"] | ||||||
|
||||||
|
||||||
class MixUp(ReferenceBasedTransform): | ||||||
|
@@ -28,10 +47,10 @@ class MixUp(ReferenceBasedTransform): | |||||
In International Conference on Learning Representations. https://arxiv.org/abs/1710.09412 | ||||||
|
||||||
Args: | ||||||
reference_data (Optional[Union[Generator[ReferenceImage, None, None], Sequence[Any]]]): | ||||||
reference_data (Optional[Union[Generator[Any, None, None], Sequence[Any]]]): | ||||||
A sequence or generator of dictionaries containing the reference data for mixing | ||||||
If None or an empty sequence is provided, no operation is performed and a warning is issued. | ||||||
read_fn (Callable[[ReferenceImage], Dict[str, Any]]): | ||||||
read_fn (Callable[[Any], ReferenceImage]): | ||||||
A function to process items from reference_data. It should accept items from reference_data | ||||||
and return a dictionary containing processed data: | ||||||
- The returned dictionary must include an 'image' key with a numpy array value. | ||||||
|
@@ -109,8 +128,8 @@ class MixUp(ReferenceBasedTransform): | |||||
|
||||||
def __init__( | ||||||
self, | ||||||
reference_data: Optional[Union[Generator[ReferenceImage, None, None], Sequence[Any]]] = None, | ||||||
read_fn: Callable[[ReferenceImage], Any] = lambda x: {"image": x, "mask": None, "class_label": None}, | ||||||
reference_data: Optional[Union[Generator[Any, None, None], Sequence[Any]]] = None, | ||||||
read_fn: Callable[[Any], ReferenceImage] = lambda x: {"image": x, "mask": None, "global_label": None}, | ||||||
alpha: float = 0.4, | ||||||
mix_coef_return_name: str = "mix_coef", | ||||||
always_apply: bool = False, | ||||||
|
@@ -174,7 +193,7 @@ def apply_to_keypoints( | |||||
def get_transform_init_args_names(self) -> Tuple[str, ...]: | ||||||
return "reference_data", "alpha" | ||||||
|
||||||
def get_params(self) -> Dict[str, Union[None, float, Dict[str, Any]]]: | ||||||
def get_params(self) -> Dict[str, Any]: | ||||||
mix_data = None | ||||||
# Check if reference_data is not empty and is a sequence (list, tuple, np.array) | ||||||
if isinstance(self.reference_data, Sequence) and not isinstance(self.reference_data, (str, bytes)): | ||||||
|
@@ -206,3 +225,228 @@ def apply_with_params(self, params: Dict[str, Any], *args: Any, **kwargs: Any) - | |||||
if self.mix_coef_return_name: | ||||||
res[self.mix_coef_return_name] = params["mix_coef"] | ||||||
return res | ||||||
|
||||||
|
||||||
class Mosaic(ReferenceBasedTransform): | ||||||
"""Performs Mosaic data augmentation, combining multiple images into a single image for enhanced model training. | ||||||
|
||||||
This transformation creates a composite image from multiple source images arranged in a grid, which can be uniform | ||||||
or random based on the `split_mode`. The mosaic augmentation introduces variations in context, scale, and object | ||||||
combinations, beneficial for object detection models. | ||||||
|
||||||
Args: | ||||||
reference_data (Optional[Union[Generator[ReferenceImage, None, None], Sequence[Any]]]): | ||||||
A sequence or generator of dictionaries with the reference data for the mosaic. | ||||||
If None or an empty sequence is provided, no operation is performed. | ||||||
read_fn (Callable[[ReferenceImage], Dict[str, Any]]): | ||||||
A function to process items from `reference_data`. It must return a dictionary containing 'image', | ||||||
and optionally 'mask' and 'global_label', each associated with numpy array values. | ||||||
grid_size (Tuple[int, int]): | ||||||
The size (rows, columns) of the grid to arrange images in the mosaic. Defaults to (3, 3). | ||||||
split_mode (str): | ||||||
Determines how the images are split and arranged in the mosaic. Can be 'uniform' for equal-sized tiles, | ||||||
or 'random' for randomly sized tiles. Defaults to 'uniform'. | ||||||
preprocessing_mode (str): resize, longest_max_size_pad, smallest_max_size_crop, random_sized_bbox_safe_crop, | ||||||
p (float): | ||||||
The probability of applying the transformation. Defaults to 0.5. | ||||||
|
||||||
Targets: | ||||||
image, mask, global_label | ||||||
|
||||||
Image types: | ||||||
uint8, float32 | ||||||
|
||||||
Raises: | ||||||
- ValueError: For invalid `grid_size` or `split_mode` values. | ||||||
- NotImplementedError: If applied to bounding boxes or keypoints. | ||||||
""" | ||||||
|
||||||
_targets = (Targets.IMAGE, Targets.MASK, Targets.BBOXES, Targets.GLOBAL_LABEL) | ||||||
|
||||||
def __init__( | ||||||
self, | ||||||
reference_data: Optional[Union[Generator[ReferenceImage, None, None], Sequence[Any]]] = None, | ||||||
read_fn: Callable[[Any], ReferenceImage] = lambda x: { | ||||||
"image": x, | ||||||
"mask": None, | ||||||
"global_label": None, | ||||||
}, | ||||||
grid_size: Tuple[int, int] = (3, 3), | ||||||
split_mode: str = "uniform", | ||||||
preprocessing_mode: str = "resize", | ||||||
target_size: Tuple[int, int] = (1024, 1024), | ||||||
always_apply: bool = False, | ||||||
p: float = 0.5, | ||||||
): | ||||||
super().__init__(always_apply, p) | ||||||
self.grid_size = grid_size | ||||||
self.split_mode = split_mode | ||||||
self.target_size = target_size | ||||||
|
||||||
if any(x <= 0 for x in self.grid_size): | ||||||
msg = "grid_size must contain positive integers." | ||||||
raise ValueError(msg) | ||||||
if split_mode not in ["uniform", "random"]: | ||||||
msg = "split_mode must be 'uniform' or 'random'." | ||||||
raise ValueError(msg) | ||||||
|
||||||
self.read_fn = read_fn | ||||||
|
||||||
if reference_data is None: | ||||||
warn("No reference data provided for Mosaic. This transform will act as a no-op.") | ||||||
self.reference_data: List[Any] = [] | ||||||
elif isinstance(reference_data, (types.GeneratorType, Iterable)) and not isinstance(reference_data, str): | ||||||
if isinstance(reference_data, Sequence) and len(reference_data) < self.grid_size[0] * self.grid_size[1] - 1: | ||||||
msg = "Not enough reference data to fill the mosaic grid." | ||||||
raise ValueError(msg) | ||||||
self.reference_data = reference_data # type: ignore[assignment] | ||||||
else: | ||||||
msg = "reference_data must be a list, tuple, generator, or None." | ||||||
raise TypeError(msg) | ||||||
|
||||||
self.preprocessing_mode = preprocessing_mode | ||||||
|
||||||
def apply( | ||||||
self, | ||||||
img: np.ndarray, | ||||||
mix_data: List[ReferenceImage], | ||||||
tiles: np.ndarray, | ||||||
preprocessing_pipeline: Compose, | ||||||
**params: Any, | ||||||
) -> np.ndarray: | ||||||
transformed_img = preprocessing_pipeline(image=img)["image"] | ||||||
return self.apply_to_image_or_mask(transformed_img, "image", mix_data, tiles) | ||||||
|
||||||
def apply_to_mask(self, mask: np.ndarray, mix_data: List[ReferenceImage], *args: Any, **params: Any) -> np.ndarray: | ||||||
msg = "Mosaic does not support keypoints yet" | ||||||
raise NotImplementedError(msg) | ||||||
|
||||||
def apply_to_bbox(self, bbox: Any, *args: Any, **params: Any) -> Any: | ||||||
msg = "Mosaic does not support bbox yet" | ||||||
raise NotImplementedError(msg) | ||||||
|
||||||
def apply_to_global_label( | ||||||
self, label: np.ndarray, mix_data: List[ReferenceImage], *args: Any, **params: Any | ||||||
) -> np.ndarray: | ||||||
msg = "Mosaic does not support global label yet" | ||||||
raise NotImplementedError(msg) | ||||||
|
||||||
def sample_reference_data(self) -> List[Dict[str, Any]]: | ||||||
total_tiles = self.grid_size[0] * self.grid_size[1] | ||||||
sampled_reference_data: List[Any] = [] | ||||||
|
||||||
if isinstance(self.reference_data, Sequence) and len(self.reference_data): | ||||||
# Select data without replacement if there are more items than needed, else with replacement | ||||||
return choice(self.reference_data, self.grid_size[0] * self.grid_size[1] - 1, replace=False) | ||||||
|
||||||
if isinstance(self.reference_data, Iterator): | ||||||
# Get the necessary number of elements from the iterator | ||||||
sampled_reference_data = [] | ||||||
|
||||||
try: | ||||||
for _ in range(total_tiles - 1): | ||||||
next_element = next(self.reference_data, None) | ||||||
if next_element is None: | ||||||
# The iterator doesn't have enough elements | ||||||
warn("Reference data iterator has insufficient data to fill the mosaic grid.", RuntimeWarning) | ||||||
# Reset mix_data as we can't fulfill the required grid tiles | ||||||
return [] | ||||||
sampled_reference_data.append(next_element) | ||||||
except StopIteration: | ||||||
# This block is in case the iterator was shorter than expected and ran out before total_tiles | ||||||
warn("Reference data iterator was exhausted before filling all tiles.", RuntimeWarning) | ||||||
# Reset mix_data as we can't fulfill the required grid tiles | ||||||
sampled_reference_data = [] | ||||||
|
||||||
return sampled_reference_data | ||||||
|
||||||
def get_params(self) -> Dict[str, Any]: | ||||||
sampled_reference_data = self.sample_reference_data() | ||||||
|
||||||
tiles = split_uniform_grid(self.target_size, self.grid_size) | ||||||
|
||||||
shuffle(tiles) | ||||||
|
||||||
mix_data = [] | ||||||
|
||||||
for idx, tile in enumerate(tiles[:-1]): # last position in shuffled tiles is for target | ||||||
element = sampled_reference_data[idx] | ||||||
processed_element = self.read_fn(element) | ||||||
# Extract the tile dimensions | ||||||
tile_height, tile_width = tile[2] - tile[0], tile[3] - tile[1] | ||||||
# Preprocess the element based on the tile size | ||||||
processed_element = self.preprocess_element(processed_element, tile_height, tile_width) | ||||||
mix_data.append(processed_element) | ||||||
|
||||||
last_tile = tiles[-1] | ||||||
last_tile_width = last_tile[3] - last_tile[1] | ||||||
last_tile_height = last_tile[2] - last_tile[0] | ||||||
preprocessing_pipeline = self.get_preprocessing_pipeline(last_tile_height, last_tile_width) | ||||||
|
||||||
return {"mix_data": sampled_reference_data, "preprocessing_pipepline": preprocessing_pipeline} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (typo): There's a typo in the dictionary key
Suggested change
|
||||||
|
||||||
def get_preprocessing_pipeline(self, tile_height: int, tile_width: int) -> Compose: | ||||||
if self.preprocessing_mode == "resize": | ||||||
return Compose([Resize(height=tile_height, width=tile_width)]) | ||||||
if self.preprocessing_mode == "longest_max_size_pad": | ||||||
return Compose( | ||||||
[ | ||||||
LongestMaxSize(max_size=max(tile_height, tile_width)), | ||||||
PadIfNeeded(min_height=tile_height, min_width=tile_width), | ||||||
] | ||||||
) | ||||||
if self.preprocessing_mode == "smallest_max_size_crop": | ||||||
return Compose( | ||||||
[ | ||||||
SmallestMaxSize(max_size=min(tile_height, tile_width)), | ||||||
RandomSizedBBoxSafeCrop(height=tile_height, width=tile_width), | ||||||
] | ||||||
) | ||||||
if self.preprocessing_mode == "random_sized_bbox_safe_crop": | ||||||
return Compose( | ||||||
[ | ||||||
RandomSizedBBoxSafeCrop(height=tile_height, width=tile_width), | ||||||
LongestMaxSize(max_size=max(tile_height, tile_width)), | ||||||
PadIfNeeded(min_height=tile_height, min_width=tile_width), | ||||||
] | ||||||
) | ||||||
|
||||||
raise ValueError(f"Unknown preprocessing_mode {self.preprocessing_mode}") | ||||||
|
||||||
def preprocess_element(self, element: ReferenceImage, tile_height: int, tile_width: int) -> ReferenceImage: | ||||||
preprocessing_pipeline = self.get_preprocessing_pipeline(tile_height, tile_width) | ||||||
|
||||||
# Apply the preprocess pipeline to the image, mask, and other elements | ||||||
return cast(ReferenceImage, preprocessing_pipeline(**element)) | ||||||
|
||||||
def apply_to_image_or_mask( | ||||||
self, data: np.ndarray, data_key: Literal["image", "mask"], mix_data: List[ReferenceImage], tiles: np.ndarray | ||||||
) -> np.ndarray: | ||||||
"""Apply transformations to an image or mask based on mixed data and tile positioning. | ||||||
|
||||||
Args: | ||||||
data (np.ndarray): The original image or mask data. | ||||||
data_key (str): The key in the processed elements dictionary that corresponds to the data | ||||||
('image' or 'mask'). | ||||||
mix_data (List[ReferenceImage]): List of processed elements (dictionaries containing 'image', 'mask'). | ||||||
tiles (List[Tuple[int, int, int, int]]): List of tile coordinates. | ||||||
|
||||||
Returns: | ||||||
np.ndarray: The new image or mask after applying the mosaic transformations. | ||||||
""" | ||||||
new_data = np.empty(self.target_size) | ||||||
for element, tile in zip(mix_data, tiles): | ||||||
if data_key in element: | ||||||
y_min, x_min, y_max, x_max = tile | ||||||
element_data = element[data_key] | ||||||
new_data[y_min:y_max, x_min:x_max] = element_data | ||||||
|
||||||
last_tile = tiles[-1] | ||||||
y_min, x_min, y_max, x_max = last_tile | ||||||
|
||||||
new_data[y_min:y_max, x_min:x_max] = data | ||||||
|
||||||
return new_data | ||||||
|
||||||
def get_transform_init_args_names(self) -> Tuple[str, ...]: | ||||||
return "reference_data", "grid_size", "split_mode", "preprocessing_mode" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (typo): The
apply_to_mask
method in theMosaic
class raisesNotImplementedError
with a message indicating that Mosaic does not support keypoints, which seems to be a copy-paste error from theapply_to_keypoints
method. The error message should correctly reflect that it's the mask processing that is not implemented.