diff --git a/README.md b/README.md index 3f93666..7d049b5 100644 --- a/README.md +++ b/README.md @@ -9,33 +9,42 @@ Note that it takes **grayscale images** on the input. RGB images therefore have ## How to use pretrained model in your project ### Prerequisities -*numpy* and *tensorflow* package to your virtual machine. Convertion to grayscale can be handled for example by *Pillow* and *scikit-image* packages. +*numpy* and *tensorflow* package to your virtual machine. ### Steps 1. Go to latest release and download binary files 2. Load model in your project using ```model = tf.keras.models.load_model(PATH_TO_MODEL_DIR)``` - PATH_TO_MODEL_DIR is path to directory containing the neural network pretrained model - it can be found inside the release binary files -3. Copy StringEncoder class from to your project (useful for decoding) -4. Convert rgb image to grayscale using e.g. this code: +3. Normalize image to 0..1 interval. If it already is, skip this step. ```python -from skimage.color import rgb2gray -# img is rgb image, input after this conversion will have (70, 175) shape -input: np.ndarray = rgb2gray(img) - ``` +img = (img / 255).astype(np.float32) +``` 4. Predict using following code ```python -# input has nowof shape (70, 175) +# convert to grayscale +r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] +input = 0.299 * r + 0.587 * g + 0.114 * b + +# input has now shape (70, 175) # we modify dimensions to match model's input input = np.expand_dims(input, 0) input = np.expand_dims(input, -1) + # input is now of shape (batch_size, 70, 175, 1) # output will have shape (batch_size, 4, 26) -output = model(input).numpy() +output = model.predict(input) # now get labels labels_indices = np.argmax(output, axis=2) -decoder = StringEncoder("abcdefghijklmnopqrstuvwxyz") -labels = [decoder.decode(x) for x in labels_indices] -# labels if list of strings, where each string represents detected captcha codel etters + +available_chars = "abcdefghijklmnopqrstuvwxyz" +def decode(li): + result = [] + for char in li: + result.append(available_chars[char]) + return "".join(result) + +# variable labels will contain read captcha codes +labels = [decode(x) for x in labels_indices] ``` - *tf* is alias for tensorflow package, *np* for numpy diff --git a/src/captcha_detection/captcha_network.py b/src/captcha_detection/captcha_network.py index d856629..6aa3880 100644 --- a/src/captcha_detection/captcha_network.py +++ b/src/captcha_detection/captcha_network.py @@ -145,26 +145,11 @@ def train(self, train_x, train_y, val_x, val_y, args): def save_model(self, out_path): tf.saved_model.save(self._model, out_path) - def predict(self, inputs): + def predict(self, inputs, args): inputs = self._image_preprocess_pipeline(inputs) - return self._predict(inputs).numpy() - - @tf.function - def _predict(self, inputs): - y_pred = self._predict_proba(inputs) - + y_pred = self._model.predict(inputs, args.batch_size) if len(y_pred.shape) <= 2: - y_pred = tf.expand_dims(y_pred, axis=1) - y_pred = tf.argmax(y_pred, axis=2) - - return y_pred - - def predict_proba(self, inputs): - inputs = self._image_preprocess_pipeline(inputs) - - return self._predict_proba(inputs).numpy() - - @tf.function - def _predict_proba(self, inputs): - return self._model(inputs) \ No newline at end of file + y_pred = np.expand_dims(y_pred, axis=1) + y_pred = np.argmax(y_pred, axis=2) + return y_pred \ No newline at end of file diff --git a/src/test.py b/src/test.py index cfcbe5f..1a0b4e9 100644 --- a/src/test.py +++ b/src/test.py @@ -87,7 +87,7 @@ labels = label_preprocess_pipeline(labels) - pred_labels = network.predict(inputs) + pred_labels = network.predict(inputs, args) correct = labels == pred_labels