Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed May 26, 2024
1 parent 3af7d89 commit 3fe911b
Showing 1 changed file with 30 additions and 100 deletions.
130 changes: 30 additions & 100 deletions pertpy/tools/_distances/_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,7 @@ def pairwise(
# able to handle precomputed distances such as the PseudobulkDistance.
if self.metric_fct.accepts_precomputed:
# Precompute the pairwise distances if needed
if (
f"{self.obsm_key}_{self.cell_wise_metric}_predistances"
not in adata.obsp.keys()
):
if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
for index_x, group_x in enumerate(fct(groups)):
Expand All @@ -305,9 +302,7 @@ def pairwise(
# subset the pairwise distance matrix to the two groups
sub_pwd = pwd[idx_x | idx_y, :][:, idx_x | idx_y]
sub_idx = grouping[idx_x | idx_y] == group_x
dist = self.metric_fct.from_precomputed(
sub_pwd, sub_idx, **kwargs
)
dist = self.metric_fct.from_precomputed(sub_pwd, sub_idx, **kwargs)
df.loc[group_x, group_y] = dist
df.loc[group_y, group_x] = dist
else:
Expand Down Expand Up @@ -385,10 +380,7 @@ def onesided_distances(
# able to handle precomputed distances such as the PsuedobulkDistance.
if self.metric_fct.accepts_precomputed:
# Precompute the pairwise distances if needed
if (
f"{self.obsm_key}_{self.cell_wise_metric}_predistances"
not in adata.obsp.keys()
):
if f"{self.obsm_key}_{self.cell_wise_metric}_predistances" not in adata.obsp.keys():
self.precompute_distances(adata, n_jobs=n_jobs, **kwargs)
pwd = adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"]
for group_x in fct(groups):
Expand Down Expand Up @@ -442,9 +434,7 @@ def precompute_distances(self, adata: AnnData, n_jobs: int = -1) -> None:
cells = adata.layers[self.layer_key]
else:
cells = adata.obsm[self.obsm_key].copy()
pwd = pairwise_distances(
cells, cells, metric=self.cell_wise_metric, n_jobs=n_jobs
)
pwd = pairwise_distances(cells, cells, metric=self.cell_wise_metric, n_jobs=n_jobs)
adata.obsp[f"{self.obsm_key}_{self.cell_wise_metric}_predistances"] = pwd


Expand Down Expand Up @@ -513,9 +503,7 @@ def __init__(self) -> None:
super().__init__()
self.accepts_precomputed = False

def __call__(
self, X: np.ndarray, Y: np.ndarray, kernel="linear", **kwargs
) -> float:
def __call__(self, X: np.ndarray, Y: np.ndarray, kernel="linear", **kwargs) -> float:
if kernel == "linear":
XX = np.dot(X, X.T)
YY = np.dot(Y, Y.T)
Expand Down Expand Up @@ -570,9 +558,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return np.linalg.norm(X.mean(axis=0) - Y.mean(axis=0), ord=2, **kwargs)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"EuclideanDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("EuclideanDistance cannot be called on a pairwise distance matrix.")


class MeanSquaredDistance(AbstractDistance):
Expand All @@ -583,15 +569,10 @@ def __init__(self) -> None:
self.accepts_precomputed = False

def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return (
np.linalg.norm(X.mean(axis=0) - Y.mean(axis=0), ord=2, **kwargs) ** 2
/ X.shape[1]
)
return np.linalg.norm(X.mean(axis=0) - Y.mean(axis=0), ord=2, **kwargs) ** 2 / X.shape[1]

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"MeanSquaredDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("MeanSquaredDistance cannot be called on a pairwise distance matrix.")


class MeanAbsoluteDistance(AbstractDistance):
Expand All @@ -602,15 +583,10 @@ def __init__(self) -> None:
self.accepts_precomputed = False

def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return (
np.linalg.norm(X.mean(axis=0) - Y.mean(axis=0), ord=1, **kwargs)
/ X.shape[1]
)
return np.linalg.norm(X.mean(axis=0) - Y.mean(axis=0), ord=1, **kwargs) / X.shape[1]

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"MeanAbsoluteDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("MeanAbsoluteDistance cannot be called on a pairwise distance matrix.")


class MeanPairwiseDistance(AbstractDistance):
Expand Down Expand Up @@ -640,9 +616,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return 1 - pearsonr(X.mean(axis=0), Y.mean(axis=0))[0]

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"PearsonDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("PearsonDistance cannot be called on a pairwise distance matrix.")


class SpearmanDistance(AbstractDistance):
Expand All @@ -656,9 +630,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return 1 - spearmanr(X.mean(axis=0), Y.mean(axis=0))[0]

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"SpearmanDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("SpearmanDistance cannot be called on a pairwise distance matrix.")


class KendallTauDistance(AbstractDistance):
Expand All @@ -676,9 +648,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return tau_dist

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"KendallTauDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("KendallTauDistance cannot be called on a pairwise distance matrix.")


class CosineDistance(AbstractDistance):
Expand All @@ -692,9 +662,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return cosine(X.mean(axis=0), Y.mean(axis=0))

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"CosineDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("CosineDistance cannot be called on a pairwise distance matrix.")


class R2ScoreDistance(AbstractDistance):
Expand All @@ -710,9 +678,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return 1 - r2_score(X.mean(axis=0), Y.mean(axis=0))

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"R2ScoreDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("R2ScoreDistance cannot be called on a pairwise distance matrix.")


class SymmetricKLDivergence(AbstractDistance):
Expand All @@ -733,23 +699,13 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, epsilon=1e-8, **kwargs) -> floa
for i in range(X.shape[1]):
x_mean, x_std = X[:, i].mean(), X[:, i].std() + epsilon
y_mean, y_std = Y[:, i].mean(), Y[:, i].std() + epsilon
kl = (
np.log(y_std / x_std)
+ (x_std**2 + (x_mean - y_mean) ** 2) / (2 * y_std**2)
- 1 / 2
)
klr = (
np.log(x_std / y_std)
+ (y_std**2 + (y_mean - x_mean) ** 2) / (2 * x_std**2)
- 1 / 2
)
kl = np.log(y_std / x_std) + (x_std**2 + (x_mean - y_mean) ** 2) / (2 * y_std**2) - 1 / 2
klr = np.log(x_std / y_std) + (y_std**2 + (y_mean - x_mean) ** 2) / (2 * x_std**2) - 1 / 2
kl_all.append(kl + klr)
return sum(kl_all) / len(kl_all)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"SymmetricKLDivergence cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("SymmetricKLDivergence cannot be called on a pairwise distance matrix.")


class TTestDistance(AbstractDistance):
Expand All @@ -773,9 +729,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, epsilon=1e-8, **kwargs) -> floa
return sum(t_test_all) / len(t_test_all)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"TTestDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("TTestDistance cannot be called on a pairwise distance matrix.")


class KSTestDistance(AbstractDistance):
Expand All @@ -792,9 +746,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return sum(stats) / len(stats)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"KSTestDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("KSTestDistance cannot be called on a pairwise distance matrix.")


class NBLL(AbstractDistance):
Expand All @@ -809,9 +761,7 @@ def __init__(self) -> None:

def __call__(self, X: np.ndarray, Y: np.ndarray, epsilon=1e-8, **kwargs) -> float:
def _is_count_matrix(matrix, tolerance=1e-6):
if matrix.dtype.kind == "i" or np.all(
np.abs(matrix - np.round(matrix)) < tolerance
):
if matrix.dtype.kind == "i" or np.all(np.abs(matrix - np.round(matrix)) < tolerance):
return True
else:
return False
Expand All @@ -820,9 +770,7 @@ def _is_count_matrix(matrix, tolerance=1e-6):
raise ValueError("NBLL distance only works for raw counts.")

@numba.jit(forceobj=True)
def _compute_nll(
y: np.ndarray, nb_params: tuple[float, float], epsilon: float
) -> float:
def _compute_nll(y: np.ndarray, nb_params: tuple[float, float], epsilon: float) -> float:
mu = np.exp(nb_params[0])
theta = 1 / nb_params[1]
eps = epsilon
Expand Down Expand Up @@ -858,16 +806,12 @@ def _process_gene(x: np.ndarray, y: np.ndarray, epsilon: float) -> float:
nlls.append(nll)

if genes_skipped > X.shape[1] / 2:
raise AttributeError(
f"{genes_skipped} genes could not be fit, which is over half."
)
raise AttributeError(f"{genes_skipped} genes could not be fit, which is over half.")

return -np.sum(nlls) / len(nlls)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"NBLL cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("NBLL cannot be called on a pairwise distance matrix.")


def _sample(X, frac=None, n=None):
Expand Down Expand Up @@ -909,9 +853,7 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
return np.mean(test_labels[:, 1])

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"ClassifierProbaDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("ClassifierProbaDistance cannot be called on a pairwise distance matrix.")


class ClassifierClassProjection(AbstractDistance):
Expand All @@ -925,9 +867,7 @@ def __init__(self) -> None:
self.accepts_precomputed = False

def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"ClassifierClassProjection can currently only be called with onesided."
)
raise NotImplementedError("ClassifierClassProjection can currently only be called with onesided.")

def onesided_distances(
self,
Expand Down Expand Up @@ -966,9 +906,7 @@ def onesided_distances(
return df

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"ClassifierClassProjection cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("ClassifierClassProjection cannot be called on a pairwise distance matrix.")


class MeanVarDistnDistance(AbstractDistance):
Expand Down Expand Up @@ -1026,11 +964,7 @@ def grid_points(d, n_points=100):
def kde_eval(d, grid):
# Kernel choice: Gaussian is too smoothing and cosine or other kernels that do not stretch out
# can not be compared well on regions further away from the data as they are -inf
return (
KernelDensity(bandwidth="silverman", kernel="exponential")
.fit(d)
.score_samples(grid)
)
return KernelDensity(bandwidth="silverman", kernel="exponential").fit(d).score_samples(grid)

kde_x = kde_eval(x, grid)
kde_y = kde_eval(y, grid)
Expand All @@ -1041,9 +975,7 @@ def kde_eval(d, grid):
return kde_diff

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"MeanVarDistnDistance cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("MeanVarDistnDistance cannot be called on a pairwise distance matrix.")


class MahalanobisDistance(AbstractDistance):
Expand All @@ -1065,6 +997,4 @@ def __call__(self, X: np.ndarray, Y: np.ndarray, **kwargs) -> float:
)

def from_precomputed(self, P: np.ndarray, idx: np.ndarray, **kwargs) -> float:
raise NotImplementedError(
"Mahalanobis cannot be called on a pairwise distance matrix."
)
raise NotImplementedError("Mahalanobis cannot be called on a pairwise distance matrix.")

0 comments on commit 3fe911b

Please sign in to comment.