fixed pre-commits and annotations with new anndata

wilhelm-lab · Sep 10, 2024 · caf7084 · caf7084
1 parent 7c8771e
commit caf7084
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 18 deletions.
diff --git a/oktoberfest/data/spectra.py b/oktoberfest/data/spectra.py
@@ -1,7 +1,9 @@
+from __future__ import annotations
+
 import logging
 from enum import Enum
 from pathlib import Path
-from typing import List, Optional, Tuple, Type, TypeVar, Union
+from typing import TYPE_CHECKING, TypeVar
 
 import anndata
 import numpy as np
@@ -10,6 +12,9 @@
 import spectrum_fundamentals.constants as c
 from scipy.sparse import csr_matrix, dok_matrix
 
+if TYPE_CHECKING:
+    from anndata.compat import Index
+
 logger = logging.getLogger(__name__)
 
 
@@ -37,7 +42,7 @@ class Spectra(anndata.AnnData):
     MAX_CHARGE = 3
 
     @staticmethod
-    def _gen_vars_df(specified_ion_types: Optional[List[str]] = None) -> pd.DataFrame:
+    def _gen_vars_df(specified_ion_types: list[str] | None = None) -> pd.DataFrame:
         """
         Creates Annotation dataframe for vars in AnnData object.
 
@@ -61,7 +66,7 @@ def _gen_vars_df(specified_ion_types: Optional[List[str]] = None) -> pd.DataFram
         return var_df
 
     @staticmethod
-    def _gen_column_names(fragment_type: FragmentType) -> List[str]:
+    def _gen_column_names(fragment_type: FragmentType) -> list[str]:
         """
         Get column names of the spectra data.
 
@@ -108,12 +113,12 @@ def _resolve_layer_name(fragment_type: FragmentType) -> str:
             layer = Spectra.MZ_LAYER_NAME
         return layer
 
-    def __getitem__(self, index: anndata._core.index.Index):
+    def __getitem__(self, index: Index):
         """Returns a sliced view of the object with this type to avoid returning AnnData instances when slicing."""
         oidx, vidx = self._normalize_indices(index)
         return Spectra(self, oidx=oidx, vidx=vidx, asview=True)
 
-    def add_column(self, data: Union[np.ndarray, pd.Series], name: Optional[str] = None) -> None:
+    def add_column(self, data: np.ndarray | pd.Series, name: str | None = None) -> None:
         """
         Add column to spectra data.
 
@@ -192,9 +197,9 @@ def add_intensities(self, intensities: np.ndarray, annotation: np.ndarray, fragm
 
     def add_list_of_predicted_intensities(
         self,
-        intensities: List[np.ndarray],
-        annotations: List[np.ndarray],
-        chunk_indices: List[np.ndarray],
+        intensities: list[np.ndarray],
+        annotations: list[np.ndarray],
+        chunk_indices: list[np.ndarray],
     ):
         """
         Add chunks of predicted intensities and convert to sparse matrix.
@@ -253,7 +258,7 @@ def _add_predicted_intensites(
 
         # self.obs.iloc[index]["done"] = True
 
-    def get_matrix(self, fragment_type: FragmentType) -> Tuple[csr_matrix, List[str]]:
+    def get_matrix(self, fragment_type: FragmentType) -> tuple[csr_matrix, list[str]]:
         """
         Get intensities sparse matrix from AnnData object.
 
@@ -268,7 +273,7 @@ def get_matrix(self, fragment_type: FragmentType) -> Tuple[csr_matrix, List[str]
 
         return matrix, self._gen_column_names(fragment_type)
 
-    def write_as_hdf5(self, output_file: Union[str, Path]):
+    def write_as_hdf5(self, output_file: str | Path):
         """
         Write spectra_data to hdf5 file.
 
@@ -277,7 +282,7 @@ def write_as_hdf5(self, output_file: Union[str, Path]):
         self.write(output_file, compression="gzip")
 
     @classmethod
-    def from_hdf5(cls: Type[SpectraT], input_file: Union[str, Path]) -> SpectraT:
+    def from_hdf5(cls: type[SpectraT], input_file: str | Path) -> SpectraT:
         """
         Read from hdf5 file.
 

diff --git a/tests/unit_tests/data/quantification/example.fasta b/tests/unit_tests/data/quantification/example.fasta
@@ -46,4 +46,4 @@ DNLTLWTSENQGDEGDAGEGEN
 >GENSCAN00000033129 pep:genscan chromosome:GRCh38:17:1345447:1365026:-1 transcript:GENSCAN00000033129 transcript_biotype:protein_codingMDVELTVEERNLLSVAYKNVIGARRASWRIISSIEQKEENKGGEDKLKMIREYRQMVETE
 LKLICCDILDVLDKHLIPAANTGESKVFYYKMKGDYHRYLAEFATGNDRKEAAENSLVAY
 KAASDIAMTELPPTHPIRLGLALNFSVFYYEILNSPDRACRLAKAAFDDAIAELDTLSEE
-SYKDSTLIMQLLRDNLTLWTSDMQGDGEEQNKEALQDVEDENQ
+SYKDSTLIMQLLRDNLTLWTSDMQGDGEEQNKEALQDVEDENQ
diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py
@@ -1,5 +1,6 @@
 import unittest
 from pathlib import Path
+
 import pandas as pd
 
 from oktoberfest.utils import Config, JobPool, ProcessStep
@@ -48,9 +49,7 @@ def test_picked_group_fdr_maxquant(self):
         config.search_results_type = "maxquant"
         config.output = Path("./tests/unit_tests/data/quantification")
         config.fdr_estimation_method = "percolator"
-        config.inputs = {
-            "library_input": Path("./tests/unit_tests/data/quantification/example.fasta")
-        }
+        config.inputs = {"library_input": Path("./tests/unit_tests/data/quantification/example.fasta")}
         config.inputs["library_input"]
         config.fasta_digest_options = {
             "digestion": "full",
@@ -62,9 +61,11 @@ def test_picked_group_fdr_maxquant(self):
             "db": "target",
         }
         apply_quant(config)
-        compare = pd.read_csv("./tests/unit_tests/data/quantification/mq_proteinGroups.txt",sep="\t")
-        results = pd.read_csv("./tests/unit_tests/data/quantification/picked_group_fdr/rescore.proteinGroups.txt",sep="\t")
-        pd.testing.assert_frame_equal(results,compare)
+        compare = pd.read_csv("./tests/unit_tests/data/quantification/mq_proteinGroups.txt", sep="\t")
+        results = pd.read_csv(
+            "./tests/unit_tests/data/quantification/picked_group_fdr/rescore.proteinGroups.txt", sep="\t"
+        )
+        pd.testing.assert_frame_equal(results, compare)
 
     def test_picked_group_fdr_sage(self):
         """Testing picked_group_fdr quantification with sage search results."""