Merge branch 'dev' into resnet

JanPalasek · Apr 27, 2020 · e7db370 · e7db370
2 parents 3147a21 + 7783d54
commit e7db370
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -9,33 +9,42 @@ Note that it takes **grayscale images** on the input. RGB images therefore have
 
 ## How to use pretrained model in your project
 ### Prerequisities
-*numpy* and *tensorflow* package to your virtual machine. Convertion to grayscale can be handled for example by *Pillow* and *scikit-image* packages.
+*numpy* and *tensorflow* package to your virtual machine.
 
 ### Steps
 1. Go to latest release and download binary files
 2. Load model in your project using ```model = tf.keras.models.load_model(PATH_TO_MODEL_DIR)```
   - PATH_TO_MODEL_DIR is path to directory containing the neural network pretrained model
   - it can be found inside the release binary files
-3. Copy StringEncoder class from to your project (useful for decoding)
-4. Convert rgb image to grayscale using e.g. this code:
+3. Normalize image to 0..1 interval. If it already is, skip this step.
 ```python
-from skimage.color import rgb2gray
-# img is rgb image, input after this conversion will have (70, 175) shape
-input: np.ndarray = rgb2gray(img)
- ```
+img = (img / 255).astype(np.float32)
+```
 4. Predict using following code
 ```python
-# input has nowof  shape (70, 175)
+# convert to grayscale
+r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
+input = 0.299 * r + 0.587 * g + 0.114 * b
+
+# input has now shape (70, 175)
 # we modify dimensions to match model's input
 input = np.expand_dims(input, 0)
 input = np.expand_dims(input, -1)
+
 # input is now of shape (batch_size, 70, 175, 1)
 # output will have shape (batch_size, 4, 26)
-output = model(input).numpy()
+output = model.predict(input)
 # now get labels
 labels_indices = np.argmax(output, axis=2)
-decoder = StringEncoder("abcdefghijklmnopqrstuvwxyz")
-labels = [decoder.decode(x) for x in labels_indices]
-# labels if list of strings, where each string represents detected captcha codel etters
+
+available_chars = "abcdefghijklmnopqrstuvwxyz"
+def decode(li):
+    result = []
+    for char in li:
+        result.append(available_chars[char])
+    return "".join(result)
+
+# variable labels will contain read captcha codes
+labels = [decode(x) for x in labels_indices]
 ```
 - *tf* is alias for tensorflow package, *np* for numpy
diff --git a/src/captcha_detection/captcha_network.py b/src/captcha_detection/captcha_network.py
@@ -145,26 +145,11 @@ def train(self, train_x, train_y, val_x, val_y, args):
     def save_model(self, out_path):
         tf.saved_model.save(self._model, out_path)
 
-    def predict(self, inputs):
+    def predict(self, inputs, args):
         inputs = self._image_preprocess_pipeline(inputs)
 
-        return self._predict(inputs).numpy()
-
-    @tf.function
-    def _predict(self, inputs):
-        y_pred = self._predict_proba(inputs)
-
+        y_pred = self._model.predict(inputs, args.batch_size)
         if len(y_pred.shape) <= 2:
-            y_pred = tf.expand_dims(y_pred, axis=1)
-        y_pred = tf.argmax(y_pred, axis=2)
-
-        return y_pred
-
-    def predict_proba(self, inputs):
-        inputs = self._image_preprocess_pipeline(inputs)
-
-        return self._predict_proba(inputs).numpy()
-
-    @tf.function
-    def _predict_proba(self, inputs):
-        return self._model(inputs)
+            y_pred = np.expand_dims(y_pred, axis=1)
+        y_pred = np.argmax(y_pred, axis=2)
+        return y_pred
diff --git a/src/test.py b/src/test.py
@@ -87,7 +87,7 @@
 
     labels = label_preprocess_pipeline(labels)
 
-    pred_labels = network.predict(inputs)
+    pred_labels = network.predict(inputs, args)
 
     correct = labels == pred_labels