-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
785 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
"""Example of using mixture models for outlier thresholding.""" | ||
# Author: D Kulik | ||
# License: BSD 2 clause | ||
|
||
|
||
import os | ||
import sys | ||
|
||
from pyod.models.knn import KNN | ||
from pyod.utils.data import evaluate_print, generate_data | ||
from pyod.utils.example import visualize | ||
|
||
from pythresh.thresholds.mixmod import MIXMOD | ||
|
||
# temporary solution for relative imports in case pyod is not installed | ||
# if pyod is installed, no need to use the following line | ||
sys.path.append( | ||
os.path.abspath(os.path.join(os.path.dirname('__file__'), '..'))) | ||
|
||
|
||
if __name__ == '__main__': | ||
contamination = 0.1 # percentage of outliers | ||
n_train = 200 # number of training points | ||
n_test = 100 # number of testing points | ||
|
||
# Generate sample data | ||
X_train, X_test, y_train, y_test =\ | ||
generate_data(n_train=n_train, | ||
n_test=n_test, | ||
n_features=2, | ||
contamination=contamination, | ||
random_state=42) | ||
|
||
# train Autoencoder detector | ||
clf_name = 'KNN' | ||
clf = KNN() | ||
clf.fit(X_train) | ||
thres = MIXMOD() | ||
|
||
# get the prediction labels and outlier scores of the training data | ||
y_train_scores = clf.decision_scores_ # raw outlier scores | ||
# binary labels (0: inliers, 1: outliers) | ||
y_train_pred = thres.eval(y_train_scores) | ||
|
||
# get the prediction on the test data | ||
y_test_scores = clf.decision_function(X_test) # outlier scores | ||
y_test_pred = thres.eval(y_test_scores) # outlier labels (0 or 1) | ||
|
||
# evaluate and print the results | ||
print('\nOn Training Data:') | ||
evaluate_print(clf_name, y_train, y_train_scores) | ||
print('\nOn Test Data:') | ||
evaluate_print(clf_name, y_test, y_test_scores) | ||
|
||
# visualize the results | ||
visualize(clf_name, X_train, X_test, y_train, y_test, y_train_pred, | ||
y_test_pred, show_figure=True, save_figure=False) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import sys | ||
import unittest | ||
from itertools import product | ||
from os.path import dirname as up | ||
|
||
# noinspection | ||
import numpy as np | ||
from numpy.testing import assert_equal | ||
from pyod.models.iforest import IForest | ||
from pyod.models.knn import KNN | ||
from pyod.models.pca import PCA | ||
from pyod.utils.data import generate_data | ||
|
||
from pythresh.thresholds.mixmod import MIXMOD | ||
|
||
# temporary solution for relative imports in case pythresh is not installed | ||
# if pythresh is installed, no need to use the following line | ||
|
||
path = up(up(up(__file__))) | ||
sys.path.append(path) | ||
|
||
|
||
class TestMIXMOD(unittest.TestCase): | ||
def setUp(self): | ||
self.n_train = 200 | ||
self.n_test = 100 | ||
self.contamination = 0.1 | ||
self.X_train, self.X_test, self.y_train, self.y_test = generate_data( | ||
n_train=self.n_train, n_test=self.n_test, | ||
contamination=self.contamination, random_state=42) | ||
|
||
clf = KNN() | ||
clf.fit(self.X_train) | ||
|
||
scores = clf.decision_scores_ | ||
|
||
clfs = [KNN(), PCA(), IForest()] | ||
|
||
multiple_scores = [ | ||
clf.fit(self.X_train).decision_scores_ for clf in clfs] | ||
multiple_scores = np.vstack(multiple_scores).T | ||
|
||
self.all_scores = [scores, multiple_scores] | ||
|
||
self.methods = ['mean', 'ks'] | ||
|
||
self.tol = [1e-3, 1e-5, 1e-8, 1e-12] | ||
|
||
self.max_iter = [50, 100, 250, 500] | ||
|
||
def test_prediction_labels(self): | ||
|
||
params = product(self.all_scores, self.methods, | ||
self.tol, self.max_iter) | ||
|
||
for scores, method, tol, max_iter in params: | ||
|
||
self.thres = MIXMOD(method=method, tol=tol, max_iter=max_iter) | ||
pred_labels = self.thres.eval(scores) | ||
|
||
assert (self.thres.thresh_ is not None) | ||
assert (self.thres.dscores_ is not None) | ||
assert (self.thres.mixture_ is not None) | ||
|
||
assert (self.thres.dscores_.min() == 0) | ||
assert (self.thres.dscores_.max() == 1) | ||
|
||
assert (self.thres.mixture_.components is not None) | ||
assert (self.thres.mixture_.weights is not None) | ||
assert (self.thres.mixture_.params is not None) | ||
|
||
nscores = self.thres.dscores_ + 1 | ||
|
||
assert (callable(self.thres.mixture_.loglikelihood) and | ||
(_ := self.thres.mixture_.loglikelihood(nscores)) | ||
is not None) | ||
|
||
assert (callable(self.thres.mixture_.pdf) and | ||
(_ := self.thres.mixture_.pdf(nscores)) | ||
is not None) | ||
|
||
assert (callable(self.thres.mixture_.posterior) and | ||
(_ := self.thres.mixture_.posterior(nscores)) | ||
is not None) | ||
|
||
assert_equal(pred_labels.shape, self.y_train.shape) | ||
|
||
if (not np.all(pred_labels == 0)) & (not np.all(pred_labels == 1)): | ||
|
||
assert (pred_labels.min() == 0) | ||
assert (pred_labels.max() == 1) |
Oops, something went wrong.