-
Notifications
You must be signed in to change notification settings - Fork 4
/
Transformations.py
executable file
·184 lines (174 loc) · 15.3 KB
/
Transformations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import numpy as np
import tensorflow as tf
# Implemented transformations:
# 'horFlip': Horizontal image flip
# 'increaseContrast<x>': Increase image contrast by 0.1*x (e.g. 'increaseContrast3': Increase contrast by 0.3)
# 'BW': Convert image to gray-scale
# 'gamma<x>': Gamma transformation. Each pixel value is raised to the power of 0.1*x, after being normalized to the range [0,1]
# 'blur<x>': Horizontally blur the image with a magnitude of x pixels.
# 'zoomin<x>': Zoom-in to 0.01*x center of image, then resize (using bilinear interolation) to original image size
# 'crop<x>_<y>': Zoom-in to 0.01*x portion of image, 0.01*y right and down from the upper left corner, then resize (using bilinear interolation) to original image size
class Transformer():
def __init__(self, transformations, batch_operation=True,max_pixel_value=255):
# Inputs:
# transformations: List of either strings or lists of string (out of the optional tranformations listed above) defining the transformations
# to be applied on all images. Each string may optionally contain a parameter, in the form
# of <transformation_name><parameter> (e.g. gamma8.5). When a transformation is a list of strings, these transformations
# are applied one after the other on each image. Parameters can also be stochastic. For this, pass <min_parameter*max_parameter>
# instead of <parameter>, and the parameter will be randomly picked for each image, using a uniform distribution.
# batch_operation: Whether the transformer should operate on a single images tensor or a batch of images tensor (default)
# max_pixel_value: Pixels' dynamic range is expected to be in [0,max_pixel_value]
# Output:
# A transformer object
self.batch_operation = batch_operation
self.max_pixel_value = max_pixel_value
self.transformations = [t if isinstance(t,list) else [t] for t in transformations]
self.num_transformations = len(self.transformations)
self.per_image_copies = len(self.transformations) + 1
self.transformation_param = [[[] for i_sub in range(len(self.transformations[i]))] for i in range(self.num_transformations)]
self.random_transformation = [np.zeros([len(self.transformations[i])]).astype(np.bool) for i in range(self.num_transformations)]
for ind, cur_transformation in enumerate(self.transformations):
for sub_ind,sub_transformation in enumerate (cur_transformation):
self.transformations[ind][sub_ind], self.transformation_param[ind][sub_ind], self.random_transformation[ind][sub_ind] =\
ParseParameters(sub_transformation)
def TransformationParameter(self,ind,shape=None):
if self.random_transformation[ind[0]][ind[1]]:
if shape is None:
return [tf.random_uniform([], minval=par[0],maxval=par[1]) for par in self.transformation_param[ind[0]][ind[1]]]
else:
return [tf.random_uniform(shape=shape, minval=par[0],maxval=par[1]) for par in self.transformation_param[ind[0]][ind[1]]]
else:
return [par for par in self.transformation_param[ind[0]][ind[1]]]
def TransformImages_TF_OP(self,images,labels):
# Creating the transformed images and labels TensorFlow operator.
# Inputs:
# images: A single image (HxWxC) or a batch of images (NxHxWxC) tensor (depending on batch_operation)
# labels: A 1-D tensor of corresponding image labels
# Outputs:
# output_images: A batch of images and their transformed versions. Each image is followed by its transformed versions, then by the next image (if batch_operation).
# output_labels: A 1-D batch of corresponding labels
assert (len(images.get_shape())==3 and not self.batch_operation) or (len(images.get_shape())==4 and self.batch_operation),'Incorrect shape of images input'
if not self.batch_operation:
images = tf.expand_dims(images,axis=0)
image_shape = np.array(images.get_shape().as_list()[1:3])
non_modified_images = tf.cast(images,tf.float32)
if any([any([('Contrast' in T) for T in Ts]) for Ts in self.transformations]):
image_mean = tf.reduce_mean(images,axis=(1,2),keep_dims=True)
images2use = tf.expand_dims(images,axis=1)
for ind,cur_chained_transformation in enumerate(self.transformations):
modified_image = 1.*non_modified_images
for sub_ind,cur_transformation in enumerate(cur_chained_transformation):
if 'increaseContrast' in cur_transformation:
modified_image = tf.maximum(0.0,tf.minimum((modified_image-image_mean)*(1+0.1*self.TransformationParameter((ind,sub_ind))[0])+image_mean,self.max_pixel_value))
elif 'horFlip' in cur_transformation:
# modified_image = tf.image.flip_left_right(modified_image)
modified_image = tf.map_fn(lambda image: tf.image.flip_left_right(image), modified_image)
elif 'blur' in cur_transformation:
blur_pixels = int(self.TransformationParameter((ind,sub_ind))[0])
assert blur_pixels>=2,'Blurring the image with blur kernel of size %d makes no difference'%(blur_pixels)
pre_blur_images = tf.pad(modified_image,paddings=((0,0),(0,0),(int((blur_pixels-1)/2),int((blur_pixels-1)/2)),(0,0)),mode='SYMMETRIC')
modified_image = tf.zeros_like(modified_image)
for pixel_num in range(blur_pixels):
modified_image = tf.add(modified_image,tf.slice(pre_blur_images/blur_pixels,begin=[0,0,pixel_num,0],size=[-1,-1,tf.shape(modified_image)[1],-1]))
elif 'BW' in cur_transformation:
modified_image = tf.tile(tf.reduce_sum(tf.multiply(modified_image,tf.reshape(tf.constant([0.299,0.587,0.114]),[1,1,1,3])),axis=3,keep_dims=True),multiples=[1,1,1,3])
elif 'gamma' in cur_transformation:
modified_image = tf.clip_by_value(modified_image,clip_value_min=0,clip_value_max=self.max_pixel_value)
# tf.Assert(tf.reduce_all(tf.greater_equal(modified_image,0)),[tf.reduce_min(modified_image)])
modified_image = tf.pow(modified_image/self.max_pixel_value,0.1*self.TransformationParameter((ind,sub_ind))[0])*self.max_pixel_value
elif 'zoomin' in cur_transformation:
if self.random_transformation[ind][sub_ind]:
crop_params = self.TransformationParameter((ind, sub_ind), shape=tf.reshape(tf.shape(images)[0], [1]))
boxes = np.reshape([-1,-1,1,1],[1,4])*0.005*tf.reshape(tf.cast(crop_params[0],dtype=tf.float32),[-1,1])+0.5*np.ones([1,4])
else:
crop_params = self.TransformationParameter((ind, sub_ind))
boxes = np.array([-1,-1,1,1])*0.005*crop_params[0]*tf.reshape(tf.ones(shape=tf.reshape(tf.shape(images)[0],[1])),[-1,1])+0.5*np.ones([1,4])
box_ind = tf.cast(tf.cumsum(tf.ones(shape=tf.reshape(tf.shape(images)[0],[1])),axis=0)-1,dtype=tf.int32)
crop_size = tf.constant(image_shape,dtype=tf.int32)
modified_image = tf.image.crop_and_resize(image=modified_image,boxes=boxes,box_ind=box_ind,crop_size=crop_size)
elif 'crop' in cur_transformation:
if self.random_transformation[ind][sub_ind]:
crop_params = self.TransformationParameter((ind, sub_ind), shape=tf.reshape(tf.shape(images)[0], [1]))
crop_params[1] = tf.minimum(crop_params[1],100-crop_params[0])
boxes = 0.01*np.ones([1,4])*tf.reshape(tf.cast(crop_params[1],dtype=tf.float32),[-1,1])+0.01*np.reshape([0,0,1,1],[1,4])*tf.reshape(tf.cast(crop_params[0],dtype=tf.float32),[-1,1])
else:
crop_params = self.TransformationParameter((ind, sub_ind))
boxes = 0.01*(np.ones([1,4])*crop_params[1]+np.reshape([0,0,1,1],[1,4])*crop_params[0])*tf.reshape(tf.ones(shape=tf.reshape(tf.shape(images)[0],[1])),[-1,1])
box_ind = tf.cast(tf.cumsum(tf.ones(shape=tf.reshape(tf.shape(images)[0], [1])), axis=0) - 1,dtype=tf.int32)
crop_size = tf.constant(image_shape, dtype=tf.int32)
modified_image = tf.image.crop_and_resize(image=modified_image, boxes=boxes, box_ind=box_ind,crop_size=crop_size)
else:
raise Exception('Transformation %s not implemented'%(cur_transformation))
images2use = tf.concat((images2use,tf.expand_dims(tf.cast(modified_image,images.dtype),axis=1)),axis=1)
output_images = tf.reshape(images2use,[-1]+images.get_shape().as_list()[1:])
output_labels = tf.reshape(tf.tile(tf.expand_dims(labels, axis=1), multiples=[1, self.per_image_copies]),[-1])
return output_images,output_labels
def Process_NonLogits_TF_OP(self,input_tensor):
# The outputs of a classifier fed with transformed images will often correspond to its input batch size, which was modified by the transformer.
# This function disimply scards the outputs that correspond to the transformed images and outputs only those corresponding to the original images.
# To be used on all output tensors (e.g. "correct classification" tensor) but the logits tensor
# Input:
# A tensor whose first dimension corresponds to the classifier's input batch size
# Output: (I apply tf.stop_gradient on the output to avoid the unnecessary gradients calculation when training a detector)
# The input tensor after filtering out values corresponding to the transformed images (along the first dimension)
input_tensor_shape = input_tensor.get_shape().as_list()
if len(input_tensor_shape)>1:
input_tensor = tf.reshape(input_tensor,[-1,self.per_image_copies]+input_tensor_shape[1:])
tensor2return = tf.reshape(tf.slice(input_tensor,begin=[0,0]+list(np.zeros([len(input_tensor_shape)-1]).astype(np.int32)),size=[-1,1]+list(-1*np.ones([len(input_tensor_shape)-1]).astype(np.int32))),
[-1]+input_tensor_shape[1:])
else:
input_tensor = tf.reshape(input_tensor,[-1,self.per_image_copies])
tensor2return = tf.reshape(tf.slice(input_tensor,begin=[0,0]+list(np.zeros([len(input_tensor_shape)-1]).astype(np.int32)),size=[-1,1]+list(-1*np.ones([len(input_tensor_shape)-1]).astype(np.int32))),[-1])
return tf.stop_gradient(tensor2return)
def Process_Logits_TF_OP(self,input_logits,reorder_logits=True,num_logits_per_transformation=-1,avoid_gradients_calc=True):
# Converts the logit output of a classifier fed by transformed images into a logits vector corresponding to the original image and a features vector.
# Inputs:
# input_logits: The logits output of a classifier of interest, in the shape of NxNUM_CLASSES,
# where N is the original batch size X (number of transformations+1)
# reorder_logits: If True (default), the features vector has the logits corresponding to all transformations (including the original
# non-transformed image) ordered according to a descending order of the logits corresponding to the original image.
# num_logits_per_transformation: (optional) Using only the logits corresponding to the top num_logits_per_transformation logits of the original image.
# To use this option, pass an integer between 1 and NUM_CLASSES-1
# Outputs: (I apply tf.stop_gradient on both outputs to avoid the unnecessary gradients calculation when training a detector)
# logits_of_original: Logits tensors corresponding to the original, non-transformed, image. The logits corresponding to the transformed versions are removed.
# features_vect: Feature vectors tensor of shape N x (number of transformation+1) x min(NUM_CLASSES,num_logits_per_transformation)
input_logits_shape = input_logits.get_shape().as_list()
assert len(input_logits_shape)==2,'Unrecognized logits shape'
assert not (num_logits_per_transformation>0 and not reorder_logits),'Cannot keep k logits per transformation without reordering them'
assert not num_logits_per_transformation>input_logits_shape[1],'Cannot keep more logits (%d) than there are originally (%d)'%(num_logits_per_transformation,input_logits_shape[1])
input_logits = tf.reshape(input_logits,[-1,self.per_image_copies,input_logits_shape[1]])
logits_of_original = tf.reshape(tf.slice(input_logits,begin=[0,0,0],size=[-1,1,-1]),[-1,input_logits_shape[-1]])
if reorder_logits:
org_logits_shape = logits_of_original.get_shape().as_list()
_,descending_order = tf.nn.top_k(logits_of_original,k=10)
descending_order = tf.tile(tf.reshape(descending_order,shape=[-1,1,org_logits_shape[1],1]),[1,self.per_image_copies,1,1])
image_indices = tf.tile(tf.reshape(tf.range(org_logits_shape[0]),[org_logits_shape[0],1,1,1]),multiples=[1,self.per_image_copies,org_logits_shape[1],1])
permutation_indices = tf.tile(tf.reshape(tf.range(self.per_image_copies),[1,self.per_image_copies,1,1]),[org_logits_shape[0],1,org_logits_shape[1],1])
combined_indices = tf.concat([image_indices,permutation_indices,descending_order],axis=3)
descending_values = tf.gather_nd(params=input_logits,indices=combined_indices)
if num_logits_per_transformation>0:
descending_values = tf.slice(descending_values,begin=[0,0,0],size=[-1,-1,num_logits_per_transformation])
features_vect = tf.reshape(descending_values,[int(input_logits_shape[0]/self.per_image_copies),-1])
else:
features_vect = tf.reshape(input_logits,[-1, self.per_image_copies * input_logits_shape[-1]])
if avoid_gradients_calc:
return tf.stop_gradient(logits_of_original),tf.stop_gradient(features_vect)
else:
return logits_of_original,features_vect
def ParseParameters(cur_transformation):
is_digit = [character.isdigit() for character in cur_transformation]
is_asterisk = [character == '*' for character in cur_transformation]
# is_underscore = [character == '_' for character in cur_transformation]
transformation_name,transformation_param,random_transformation = cur_transformation,None,False
if np.any(np.logical_or(is_asterisk,is_digit)):
transformation_param = []
params_first_ind = np.argwhere(np.logical_or(is_asterisk,is_digit))[0][0]
params = transformation_name[params_first_ind:].split('_')
transformation_name = cur_transformation[:params_first_ind]
for param in params:
if '*' in param:
random_transformation = True
transformation_param.append([float(param[:param.find('*')]),float(param[param.find('*')+1:])])
else:
transformation_param.append(float(param))
return transformation_name,transformation_param,random_transformation