From 82620aeca23df5a454ac3f6e886e322be594ac42 Mon Sep 17 00:00:00 2001 From: Luiz Carlos Cavalcanti Date: Wed, 29 Mar 2017 23:17:50 -0300 Subject: [PATCH] Simplifying InvalidCnpjCpfClassifier implementation The InvalidCnpjCpfClassifier was doing unnecessary copy of the whole dataset and also converting one of the dataframe columns for no apparent reason. This change should improve running time for the classifier as well as its memory footprint. Signed-off-by: Luiz Carlos Cavalcanti --- rosie/invalid_cnpj_cpf_classifier.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rosie/invalid_cnpj_cpf_classifier.py b/rosie/invalid_cnpj_cpf_classifier.py index 26b1d140f..fa2b20b0e 100644 --- a/rosie/invalid_cnpj_cpf_classifier.py +++ b/rosie/invalid_cnpj_cpf_classifier.py @@ -14,9 +14,7 @@ def transform(self, X=None): return self def predict(self, X): - self._X = X.copy() - self._X['cnpj_cpf'] = self._X['cnpj_cpf'].astype(np.str) - return np.r_[self._X.apply(self.__is_invalid, axis=1)] + return np.r_[X.apply(self.__is_invalid, axis=1)] def __is_invalid(self, row): - return (row['document_type'] in [0, 1]) & (not cpfcnpj.validate(row['cnpj_cpf'])) + return (row['document_type'] in [0, 1]) & (not cpfcnpj.validate(str(row['cnpj_cpf'])))