From 4e18fda54aa0cb811969a24e33239cc44292d819 Mon Sep 17 00:00:00 2001 From: Vishisht Priyadarshi Date: Tue, 14 Dec 2021 08:34:04 +0000 Subject: [PATCH] Improve package --- .gitignore | 2 +- cobraclassifier/cobra_boost.py | 66 ++++++++++++++++++++++++++++++++++ setup.py | 6 ++-- 3 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 cobraclassifier/cobra_boost.py diff --git a/.gitignore b/.gitignore index 4477699..a1f5910 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ .venv/ -__pycache__/ +**/__pycache__/ .vscode build/ dist/ \ No newline at end of file diff --git a/cobraclassifier/cobra_boost.py b/cobraclassifier/cobra_boost.py new file mode 100644 index 0000000..9abd816 --- /dev/null +++ b/cobraclassifier/cobra_boost.py @@ -0,0 +1,66 @@ +import numpy as np +from math import log +from itertools import compress + +from cobraclassifier import classifier_cobra as cobra +from cobraclassifier import edited_knn, near_miss_v1, near_miss_v2, near_miss_v3, tomek_link, condensed_knn, knn_und + + +class CobraBoost: + def __init__(self, X, y, machines, undersampling_method): + self.X = X + self.y = y + + self.model = cobra(machines = machines) + self.majority_class_label = int(sum(y) > 0.5 * len(y)) + self.undersampling_method = undersampling_method + + self.weight_update = 0 + self.init_w = 1.0 / len(self.X) + self.weight = np.full(len(self.X), self.init_w) + + + def learn_parameters(self, iterations): + verdict = self.undersampling_method.undersample(self.X, self.y, self.majority_class_label) + X_undersampled, y_undersampled = self.X[verdict, :], self.y[verdict] + + for t in range(iterations): + print("[Testing]: Executing the iteration - {} of CobraBoost".format(t + 1)) + + self.model.fit(X_undersampled, y_undersampled, sample_weight = self.weight[verdict]) + + flag = self.y != self.model.predict(self.X) + loss = sum(list(compress(self.weight, flag))) + + alpha = loss / (1 - loss) + + if alpha <= 0: + alpha = 0.0000001 + else: + try: + alpha_hat = 0.5 * (np.log(1 - loss) - np.log(loss)) + except: + alpha_hat = 0 + + self.weight = self.weight * np.exp(-alpha_hat * self.y * self.model.predict(self.X)) + self.weight = self.weight / self.weight.sum() + + self.weight_update = alpha + + + def predict(self, test_data): + n = len(test_data) + predicted_labels = np.zeros(n) + + for i in range(n): + positive_score, negative_score = 0, 0 + + if self.model.predict(test_data[i].reshape(1, -1)) == 1: + positive_score += log(1/self.weight_update) + else: + negative_score += log(1/self.weight_update) + + if negative_score <= positive_score: + predicted_labels[i] = 1 + + return predicted_labels \ No newline at end of file diff --git a/setup.py b/setup.py index 2a04957..99d0a2d 100644 --- a/setup.py +++ b/setup.py @@ -2,13 +2,13 @@ setup( name = 'cobraclassifier', packages = ['cobraclassifier'], - version = '1.3', - license='MIT', + version = '1.4', + license = 'MIT', description = 'COBRA for classification tasks (on Imbalanced Data)', author = ['Dr. Arabin Kumar Dey', 'Vishisht Priyadarshi', 'Aadi Gupta', 'Tejus Singla', 'Shashank Goyal'], author_email = 'vishishtpriyadarshi867@gmail.com', url = 'https://github.com/vishishtpriyadarshi/MA691-COBRA-6', - download_url = 'https://github.com/vishishtpriyadarshi/MA691-COBRA-6/archive/refs/tags/v1.1.tar.gz', + download_url = 'https://github.com/vishishtpriyadarshi/MA691-COBRA-6/archive/refs/tags/v1.4.tar.gz', keywords = ['Classification', 'Imbalanced Data', 'Machine Learning'], install_requires=[ 'numpy',