Skip to content

Commit

Permalink
fix: use pseudo inverse instead of inv
Browse files Browse the repository at this point in the history
Signed-off-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 committed Jul 26, 2023
1 parent 9dfe615 commit 22a8ec9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 13 deletions.
23 changes: 11 additions & 12 deletions numalogic/models/threshold/_mahalanobis.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ def __init__(self, max_outlier_prob: float = 0.1):
self._is_fitted = False

@property
def threshold(self):
def threshold(self) -> float:
"""Returns the threshold value."""
return self._md_thresh

@property
def std_factor(self):
def std_factor(self) -> float:
"""Returns the k value calculated using Chebyshev's inequality."""
return self._k

Expand All @@ -90,15 +90,15 @@ def fit(self, x: npt.NDArray[float]) -> Self:
Args:
----
x: training data
x: training data of shape (n_samples, n_features)
Returns
-------
self
"""
self._distr_mean = np.mean(x, axis=0)
cov = np.cov(x, rowvar=False)
self._cov_inv = np.linalg.inv(cov)
self._cov_inv = np.linalg.pinv(cov)
mahal_dist = self.mahalanobis(x)
self._md_thresh = np.mean(mahal_dist) + self._k * np.std(mahal_dist)
self._is_fitted = True
Expand All @@ -110,11 +110,11 @@ def mahalanobis(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
Args:
----
x: input data
x: input data of shape (n_samples, n_features)
Returns
-------
Mahalanobis distance vector
Mahalanobis distance vector of shape (n_samples,)
"""
x_distance = x - self._distr_mean
mahal_grid = x_distance @ self._cov_inv @ x_distance.T
Expand All @@ -127,11 +127,11 @@ def predict(self, x: npt.NDArray[float]) -> npt.NDArray[int]:
Args:
----
x: input data
x: input data of shape (n_samples, n_features)
Returns
-------
Integer Array of 0s and 1s
Integer Array of shape (n_samples,)
Raises
------
Expand All @@ -140,8 +140,7 @@ def predict(self, x: npt.NDArray[float]) -> npt.NDArray[int]:
if not self._is_fitted:
raise ModelInitializationError("Model not fitted yet.")
md = self.mahalanobis(x)
y_hat = np.zeros_like(x)
y_hat[md < self._md_thresh] = _INLIER
y_hat = np.zeros(x.shape[0], dtype=int)
y_hat[md >= self._md_thresh] = _OUTLIER
return y_hat

Expand All @@ -154,11 +153,11 @@ def score_samples(self, x: npt.NDArray[float]) -> npt.NDArray[float]:
Args:
----
x: input data
x: input data of shape (n_samples, n_features)
Returns
-------
Outlier score for each sample
Outlier score vector of shape (n_samples,)
Raises
------
Expand Down
8 changes: 7 additions & 1 deletion tests/models/test_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,17 @@ def test_init_err(self):
with self.assertRaises(ValueError):
MahalanobisThreshold(max_outlier_prob=1.0)

def test_singular(self):
clf = MahalanobisThreshold()
clf.fit(np.ones((100, 15)))
md = clf.mahalanobis(np.ones((30, 15)))
self.assertTupleEqual((30,), md.shape)

def test_predict(self):
clf = MahalanobisThreshold()
clf.fit(self.x_train)
y = clf.predict(self.x_test)
self.assertTupleEqual(self.x_test.shape, y.shape)
self.assertTupleEqual((self.x_test.shape[0],), y.shape)
self.assertEqual(np.max(y), 1)
self.assertEqual(np.min(y), 0)

Expand Down

0 comments on commit 22a8ec9

Please sign in to comment.