From 1e1b10b855a62c106d192f8e726211206b8b372f Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Thu, 13 Jun 2024 21:41:31 +0200
Subject: [PATCH 1/4] #53 ADD docs for psm_match.py

---
 alpharaw/match/psm_match.py           | 197 +++++++++++++++++---------
 alpharaw/match/psm_match_alphatims.py |   5 +-
 2 files changed, 134 insertions(+), 68 deletions(-)

diff --git a/alpharaw/match/psm_match.py b/alpharaw/match/psm_match.py
index a13de5c..d544389 100644
--- a/alpharaw/match/psm_match.py
+++ b/alpharaw/match/psm_match.py
@@ -30,6 +30,25 @@
 class PepSpecMatch:
     """
     Extract fragment ions from MS2 data.
+
+    Parameters
+    ----------
+    charged_frag_types : list, optional
+        fragment types with charge states,
+        e.g. ['b_z1', 'y_z2', 'b_modloss_z1', 'y_H2O_z2'].
+        If None, it is `get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2)`.
+        By default None.
+
+    match_closest : bool, optional
+        if True, match the closest peak for a m/z;
+        if False, matched the higest peak for a m/z in the tolerance range.
+        By default True.
+
+    use_ppm : bool, optional
+        If use ppm, by default True.
+
+    tol_value : float, optional
+        tolerance value, by default 20.0
     """
 
     match_closest: bool = True
@@ -45,25 +64,6 @@ def __init__(
         use_ppm: bool = True,
         tol_value: float = 20.0,
     ):
-        """
-        Parameters
-        ----------
-        charged_frag_types : list, optional
-            fragment types with charge states,
-            e.g. ['b_z1', 'y_z2', 'b_modloss_z1', 'y_H2O_z2'].
-            By default `get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2)`
-
-        match_closest : bool, optional
-            if True, match the closest peak for a m/z;
-            if False, matched the higest peak for a m/z in the tolerance range.
-            By default True
-
-        use_ppm : bool, optional
-            If use ppm, by default True
-
-        tol_value : float, optional
-            tolerance value, by default 20.0
-        """
         self.charged_frag_types = (
             get_charged_frag_types(["b", "y", "b_modloss", "y_modloss"], 2)
             if charged_frag_types is None
@@ -73,17 +73,41 @@ def __init__(
         self.use_ppm = use_ppm
         self.tolerance = tol_value
 
-    def _preprocess_psms(self, psm_df):
-        pass
+    def get_fragment_mz_df(self) -> pd.DataFrame:
+        """
+        Call :func:`alphabase.peptide.fragment.create_fragment_mz_dataframe`
+        for :attr:`PepSpecMatch.psm_df` and :attr:`PepSpecMatch.charged_frag_types`.
 
-    def get_fragment_mz_df(self):
+
+        Returns
+        -------
+        DataFrame
+            _description_
+        """
         return create_fragment_mz_dataframe(
             self.psm_df,
             self.charged_frag_types,
             dtype=PEAK_MZ_DTYPE,
         )
 
-    def _add_missing_columns_to_psm_df(self, psm_df: pd.DataFrame, raw_data=None):
+    def _add_missing_columns_to_psm_df(
+        self, psm_df: pd.DataFrame, raw_data: MSData_Base = None
+    ):
+        """
+        Add missing "rt", "nce", "rt_norm", ("mobility") columns to `psm_df` if missing.
+
+        Parameters
+        ----------
+        psm_df : pd.DataFrame
+            psm dataframe to be processed.
+        raw_data : MSData_Base, optional
+            The `MSData_Base`. If None, `self.raw_data`. by default None.
+
+        Returns
+        -------
+        DataFrame
+            psm_df inplace.
+        """
         if raw_data is None:
             raw_data = self.raw_data
         add_spec_info_list = []
@@ -117,7 +141,19 @@ def _add_missing_columns_to_psm_df(self, psm_df: pd.DataFrame, raw_data=None):
         #     psm_df['rt_sec'] = psm_df.rt*60
         return psm_df
 
-    def _prepare_matching_dfs(self):
+    def _prepare_matching_dfs(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Prepare dataframes to be matched.
+
+        Returns
+        -------
+        Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]
+            pd.DataFrame: fragment mz dataframe.
+
+            pd.DataFrame: intensity dataframe to match.
+
+            pd.DataFrame: mz error dataframe to match.
+        """
         fragment_mz_df = self.get_fragment_mz_df()
 
         matched_intensity_df = pd.DataFrame(
@@ -138,17 +174,17 @@ def load_ms_data(
         process_count: int = 8,
         **kwargs,
     ):
-        """Load MS files
+        """Load MS file to set `self.raw_data`.
 
         Parameters
         ----------
         ms_file : str | MSData_Base
-            ms2 file path
+            ms2 file path.
 
         ms_file_type : str, optional
             ms2 file type, could be
             ["alpharaw_hdf","thermo","sciex","alphapept_hdf","mgf"].
-            Default to 'alpharaw_hdf'
+            Default to 'alpharaw_hdf'.
         """
         self.raw_data = load_ms_data(ms_file, ms_file_type, process_count=process_count)
 
@@ -157,18 +193,39 @@ def get_peaks(self, spec_idx: int, **kwargs):
 
     def _match_one_psm(
         self,
-        spec_mzs: np.ndarray,
-        spec_intens: np.ndarray,
+        peak_mzs: np.ndarray,
+        peak_intens: np.ndarray,
         fragment_mz_df: pd.DataFrame,
         matched_intensity_df: pd.DataFrame,
         matched_mz_err_df: pd.DataFrame,
         frag_start_idx: int,
         frag_stop_idx: int,
     ):
-        if len(spec_mzs) == 0:
+        """
+        Match fragments of one precursor (located by `frag_start_idx` and `frag_stop_idx`)
+        against the corresponding `peak_mzs`.
+
+        Parameters
+        ----------
+        peak_mzs : np.ndarray
+            Peak m/z values to be matched.
+        peak_intens : np.ndarray
+            Peak intensities to be matched.
+        fragment_mz_df : pd.DataFrame
+            fragment m/z dataframe to be matched.
+        matched_intensity_df : pd.DataFrame
+            The dataframe to store matched intensity values.
+        matched_mz_err_df : pd.DataFrame
+            The dataframe to store matched mz error values.
+        frag_start_idx : int
+            fragment start index of the given PSM.
+        frag_stop_idx : int
+            fragment stop index of the given PSM.
+        """
+        if len(peak_mzs) == 0:
             return
 
-        spec_mzs = spec_mzs.astype(PEAK_MZ_DTYPE)
+        peak_mzs = peak_mzs.astype(PEAK_MZ_DTYPE)
 
         frag_mzs = fragment_mz_df.values[frag_start_idx:frag_stop_idx, :]
 
@@ -179,20 +236,20 @@ def _match_one_psm(
 
         if self.match_closest:
             matched_idxes = match_closest_peaks(
-                spec_mzs, spec_intens, frag_mzs, mz_tols
+                peak_mzs, peak_intens, frag_mzs, mz_tols
             )
         else:
             matched_idxes = match_highest_peaks(
-                spec_mzs,
-                spec_intens,
+                peak_mzs,
+                peak_intens,
                 frag_mzs,
                 mz_tols,
             )
 
-        matched_intens = spec_intens[matched_idxes]
+        matched_intens = peak_intens[matched_idxes]
         matched_intens[matched_idxes == -1] = 0
 
-        matched_mz_errs = np.abs(spec_mzs[matched_idxes] - frag_mzs)
+        matched_mz_errs = np.abs(peak_mzs[matched_idxes] - frag_mzs)
         matched_mz_errs[matched_idxes == -1] = np.inf
 
         matched_intensity_df.values[frag_start_idx:frag_stop_idx, :] = matched_intens
@@ -203,7 +260,7 @@ def match_ms2_one_raw(
         self,
         psm_df_one_raw: pd.DataFrame,
         verbose: bool = False,
-    ) -> tuple:
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         """
         Matching psm_df_one_raw against self.raw_data
         after `self.load_ms_data()`
@@ -216,7 +273,7 @@ def match_ms2_one_raw(
 
         Returns
         -------
-        tuple:
+        Tuple:
             pd.DataFrame: psm dataframe with fragment index information.
 
             pd.DataFrame: fragment mz dataframe.
@@ -224,10 +281,8 @@ def match_ms2_one_raw(
             pd.DataFrame: matched intensity dataframe.
 
             pd.DataFrame: matched mass error dataframe.
-            np.inf if a fragment is not matched.
-
+              np.inf if a fragment is not matched.
         """
-        self._preprocess_psms(psm_df_one_raw)
         self.psm_df = psm_df_one_raw
 
         psm_df_one_raw = self._add_missing_columns_to_psm_df(
@@ -307,8 +362,9 @@ def match_ms2_multi_raw(
         ms_files: Union[dict, list],
         ms_file_type: str = "alpharaw_hdf",
         process_num: int = 1,
-    ):
-        """Matching PSM dataframe against the ms2 files in ms_files
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Matching PSM dataframe against the ms2 files in ms_files
         This method will store matched values as attributes:
         - self.psm_df
         - self.fragment_mz_df
@@ -330,7 +386,7 @@ def match_ms2_multi_raw(
 
         Returns
         -------
-        tuple:
+        Tuple:
             pd.DataFrame: psm dataframe with fragment index information.
 
             pd.DataFrame: fragment mz dataframe.
@@ -338,10 +394,9 @@ def match_ms2_multi_raw(
             pd.DataFrame: matched intensity dataframe.
 
             pd.DataFrame: matched mass error dataframe.
-            np.inf if a fragment is not matched.
+                np.inf if a fragment is not matched.
 
         """
-        self._preprocess_psms(psm_df)
         self.psm_df = psm_df
 
         (
@@ -425,7 +480,9 @@ def _prepare_matching_dfs(self):
 
         return (fragment_mz_df, matched_intensity_df, matched_mz_err_df)
 
-    def _match_ms2_one_raw_numba(self, raw_name, psm_df_one_raw):
+    def _match_ms2_one_raw_numba(
+        self, raw_name: str, psm_df_one_raw: pd.DataFrame
+    ) -> pd.DataFrame:
         psm_df_one_raw = psm_df_one_raw.reset_index(drop=True)
 
         if raw_name in self._ms_file_dict:
@@ -485,7 +542,7 @@ def match_ms2_multi_raw(
         ms_files: Tuple[dict, list],
         ms_file_type: str = "alpharaw_hdf",
         process_num: int = 8,
-    ):
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         if isinstance(ms_files, list):
             ms_files = parse_ms_files_to_dict(ms_files)
         psm_df = psm_df[psm_df.raw_name.isin(ms_files)].reset_index(drop=True)
@@ -501,18 +558,18 @@ def match_ms2_multi_raw(
 
 @numba.jit(nogil=True)
 def match_one_raw_with_numba(
-    spec_idxes,
-    frag_start_idxes,
-    frag_stop_idxes,
-    all_frag_mzs,
-    all_frag_mz_tols,
-    all_spec_mzs,
-    all_spec_intensities,
-    peak_start_idxes,
-    peak_stop_idxes,
-    matched_intensities,
-    matched_mz_errs,
-    match_closest=True,
+    spec_idxes: np.ndarray,
+    frag_start_idxes: np.ndarray,
+    frag_stop_idxes: np.ndarray,
+    all_frag_mzs: np.ndarray,
+    all_frag_mz_tols: np.ndarray,
+    all_spec_mzs: np.ndarray,
+    all_spec_intensities: np.ndarray,
+    peak_start_idxes: np.ndarray,
+    peak_stop_idxes: np.ndarray,
+    matched_intensities: np.ndarray,
+    matched_mz_errs: np.ndarray,
+    match_closest: bool = True,
 ):
     """
     Internel function to match fragment mz values to spectrum mz values.
@@ -567,7 +624,8 @@ def load_ms_data(
     ms_file_type: str = "alpharaw_hdf",
     process_count: int = 8,
 ) -> MSData_Base:
-    """Load MS files
+    """
+    Load MS file.
 
     Parameters
     ----------
@@ -575,9 +633,14 @@ def load_ms_data(
         ms2 file path
 
     ms_file_type : str, optional
-        ms2 file type, could be
-        ["alpharaw_hdf","thermo","sciex","alphapept_hdf","mgf"].
-        Default to 'alpharaw_hdf'
+        ms2 file type, can be
+        ["alpharaw_hdf", "thermo", "sciex", "alphapept_hdf", "mgf"].
+        Default to 'alpharaw_hdf'.
+
+    Returns
+    -------
+    MSData_Base:
+        Instance of sub-class of `MSData_Base`.
     """
     if isinstance(ms_file, MSData_Base):
         return ms_file
@@ -600,6 +663,9 @@ def get_best_matched_intens(
     frag_start_idxes: np.ndarray,
     frag_stop_idxes: np.ndarray,
 ):
+    """
+    TODO Deprecated
+    """
     ret_intens = np.zeros(
         shape=matched_intensity_values.shape[1:], dtype=matched_intensity_values.dtype
     )
@@ -624,6 +690,9 @@ def get_ion_count_scores(
     frag_stop_idxes: np.ndarray,
     min_mz: float = 200,
 ):
+    """
+    TODO Deprecated
+    """
     scores = []
     for i in range(len(frag_start_idxes)):
         scores.append(
diff --git a/alpharaw/match/psm_match_alphatims.py b/alpharaw/match/psm_match_alphatims.py
index 876ba69..d2affc4 100644
--- a/alpharaw/match/psm_match_alphatims.py
+++ b/alpharaw/match/psm_match_alphatims.py
@@ -1,3 +1,4 @@
+# TODO to be remove as already implemented in alphaDIA.
 from typing import Tuple, Union
 
 import numpy as np
@@ -6,9 +7,6 @@
 from alphatims.bruker import TimsTOF
 
 from alpharaw.ms_data_base import MSData_Base, ms_reader_provider
-from alpharaw.wrappers.alphapept_wrapper import (
-    AlphaPept_HDF_MS2_Reader,  # noqa: F401  # TODO remove import side effect
-)
 from alpharaw.wrappers.alphatims_wrapper import AlphaTimsWrapper
 
 from .psm_match import PepSpecMatch
@@ -231,7 +229,6 @@ def match_ms2_one_raw(
             np.inf if a fragment is not matched.
 
         """
-        self._preprocess_psms(psm_df_one_raw)
         self.psm_df = psm_df_one_raw
 
         psm_df_one_raw = self._add_missing_columns_to_psm_df(psm_df_one_raw)

From d10fa5e52b31f7b9caa01806722363501a22fcda Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 17 Jun 2024 12:41:28 +0200
Subject: [PATCH 2/4] #53 FIX docs for psm_match.py

---
 alpharaw/match/psm_match.py | 113 ++++++++++++++++++++++++++----------
 1 file changed, 82 insertions(+), 31 deletions(-)

diff --git a/alpharaw/match/psm_match.py b/alpharaw/match/psm_match.py
index d544389..814dfd2 100644
--- a/alpharaw/match/psm_match.py
+++ b/alpharaw/match/psm_match.py
@@ -30,25 +30,8 @@
 class PepSpecMatch:
     """
     Extract fragment ions from MS2 data.
-
-    Parameters
-    ----------
-    charged_frag_types : list, optional
-        fragment types with charge states,
-        e.g. ['b_z1', 'y_z2', 'b_modloss_z1', 'y_H2O_z2'].
-        If None, it is `get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2)`.
-        By default None.
-
-    match_closest : bool, optional
-        if True, match the closest peak for a m/z;
-        if False, matched the higest peak for a m/z in the tolerance range.
-        By default True.
-
-    use_ppm : bool, optional
-        If use ppm, by default True.
-
-    tol_value : float, optional
-        tolerance value, by default 20.0
+    The extracted information can be used for visualization of peak annotation or
+    PeptDeep transfer learnining for the MS2 model.
     """
 
     match_closest: bool = True
@@ -64,6 +47,26 @@ def __init__(
         use_ppm: bool = True,
         tol_value: float = 20.0,
     ):
+        """
+        Parameters
+        ----------
+        charged_frag_types : list, optional
+            fragment types with charge states,
+            e.g. ['b_z1', 'y_z2', 'b_modloss_z1', 'y_H2O_z2'].
+            Defaults to `get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2)`.
+
+        match_closest : bool, optional
+            if True, match the closest peak for a m/z;
+            if False, matched the higest peak for a m/z in the tolerance range.
+            By default True.
+
+        use_ppm : bool, optional
+            If use ppm other wise Da, by default True.
+
+        tol_value : float, optional
+            Matching tolerance value (ppm or Da based on `use_ppm`)
+            for peak annotation, by default 20.0
+        """
         self.charged_frag_types = (
             get_charged_frag_types(["b", "y", "b_modloss", "y_modloss"], 2)
             if charged_frag_types is None
@@ -82,7 +85,7 @@ def get_fragment_mz_df(self) -> pd.DataFrame:
         Returns
         -------
         DataFrame
-            _description_
+            The fragment m/z dataframe in alphabase format.
         """
         return create_fragment_mz_dataframe(
             self.psm_df,
@@ -94,7 +97,7 @@ def _add_missing_columns_to_psm_df(
         self, psm_df: pd.DataFrame, raw_data: MSData_Base = None
     ):
         """
-        Add missing "rt", "nce", "rt_norm", ("mobility") columns to `psm_df` if missing.
+        Add missing "rt", "nce", "rt_norm", ("mobility") columns to `psm_df` inplace if missing.
 
         Parameters
         ----------
@@ -106,7 +109,7 @@ def _add_missing_columns_to_psm_df(
         Returns
         -------
         DataFrame
-            psm_df inplace.
+            The original `psm_df` with missing columns added.
         """
         if raw_data is None:
             raw_data = self.raw_data
@@ -179,7 +182,7 @@ def load_ms_data(
         Parameters
         ----------
         ms_file : str | MSData_Base
-            ms2 file path.
+            Absolute or relative path of the ms2 file.
 
         ms_file_type : str, optional
             ms2 file type, could be
@@ -194,7 +197,7 @@ def get_peaks(self, spec_idx: int, **kwargs):
     def _match_one_psm(
         self,
         peak_mzs: np.ndarray,
-        peak_intens: np.ndarray,
+        peak_intensities: np.ndarray,
         fragment_mz_df: pd.DataFrame,
         matched_intensity_df: pd.DataFrame,
         matched_mz_err_df: pd.DataFrame,
@@ -209,7 +212,7 @@ def _match_one_psm(
         ----------
         peak_mzs : np.ndarray
             Peak m/z values to be matched.
-        peak_intens : np.ndarray
+        peak_intensities : np.ndarray
             Peak intensities to be matched.
         fragment_mz_df : pd.DataFrame
             fragment m/z dataframe to be matched.
@@ -236,17 +239,17 @@ def _match_one_psm(
 
         if self.match_closest:
             matched_idxes = match_closest_peaks(
-                peak_mzs, peak_intens, frag_mzs, mz_tols
+                peak_mzs, peak_intensities, frag_mzs, mz_tols
             )
         else:
             matched_idxes = match_highest_peaks(
                 peak_mzs,
-                peak_intens,
+                peak_intensities,
                 frag_mzs,
                 mz_tols,
             )
 
-        matched_intens = peak_intens[matched_idxes]
+        matched_intens = peak_intensities[matched_idxes]
         matched_intens[matched_idxes == -1] = 0
 
         matched_mz_errs = np.abs(peak_mzs[matched_idxes] - frag_mzs)
@@ -442,6 +445,9 @@ def match_ms2_multi_raw(
 
 
 class PepSpecMatch_DIA(PepSpecMatch):
+    """
+    Peak annotation for DIA data.
+    """
     max_spec_per_query: int = 3
     min_frag_mz: float = 200.0
 
@@ -483,6 +489,22 @@ def _prepare_matching_dfs(self):
     def _match_ms2_one_raw_numba(
         self, raw_name: str, psm_df_one_raw: pd.DataFrame
     ) -> pd.DataFrame:
+        """
+        Internal method to extract peak information with numba as backend.
+
+        Parameters
+        ----------
+        raw_name : str
+            The raw name of the raw file. `psm_df_one_raw` dataframe should also
+            contain the same raw name in `raw_name` column.
+        psm_df_one_raw : pd.DataFrame
+            The dataframe for PSMs.
+
+        Returns
+        -------
+        pd.DataFrame
+            `psm_df_one_raw`
+        """
         psm_df_one_raw = psm_df_one_raw.reset_index(drop=True)
 
         if raw_name in self._ms_file_dict:
@@ -542,7 +564,36 @@ def match_ms2_multi_raw(
         ms_files: Tuple[dict, list],
         ms_file_type: str = "alpharaw_hdf",
         process_num: int = 8,
-    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+        """
+        Match peaks for the given `psm_df` against the corresponding MS spectrum files.
+
+        Parameters
+        ----------
+        psm_df : pd.DataFrame
+            Peptide-spectrum matches in alphabase dataframe format.
+        ms_files : Tuple[dict, list]
+            The absolute or relative paths of MS files.
+            if the type is `dict`, the format will be
+            `{'raw_name1': 'raw_name1.raw', ...}` if `ms_file_type` is `thermo_raw`.
+        ms_file_type : str, optional
+            MS file type that is already registered in
+            :obj:`alpharaw.ms_data_base.ms_reader_provider`.
+            By default "alpharaw_hdf".
+        process_num : int, optional
+            Match peaks by using multiprocessing, by default 8
+
+        Returns
+        -------
+        Tuple
+            pd.DataFrame: the `psm_df`.
+
+            pd.DataFrame: fragment m/z dataframe in alphabase format.
+
+            pd.DataFrame: the matched fragment intensity dataframe in alphabase format.
+
+            pd.DataFrame: the matched mass error in the same dataframe format.
+        """
         if isinstance(ms_files, list):
             ms_files = parse_ms_files_to_dict(ms_files)
         psm_df = psm_df[psm_df.raw_name.isin(ms_files)].reset_index(drop=True)
@@ -570,7 +621,7 @@ def match_one_raw_with_numba(
     matched_intensities: np.ndarray,
     matched_mz_errs: np.ndarray,
     match_closest: bool = True,
-):
+)->None:
     """
     Internel function to match fragment mz values to spectrum mz values.
     Matched_mz_errs[i] = np.inf if no peaks are matched.
@@ -625,7 +676,7 @@ def load_ms_data(
     process_count: int = 8,
 ) -> MSData_Base:
     """
-    Load MS file.
+    Load MS file and get `MSData_Base` object.
 
     Parameters
     ----------

From d243b641820e0c7683f9a63b11160bd4dead4017 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Mon, 17 Jun 2024 12:42:50 +0200
Subject: [PATCH 3/4] #53 FIX pre-commit

---
 alpharaw/match/psm_match.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/alpharaw/match/psm_match.py b/alpharaw/match/psm_match.py
index 814dfd2..e058f27 100644
--- a/alpharaw/match/psm_match.py
+++ b/alpharaw/match/psm_match.py
@@ -448,6 +448,7 @@ class PepSpecMatch_DIA(PepSpecMatch):
     """
     Peak annotation for DIA data.
     """
+
     max_spec_per_query: int = 3
     min_frag_mz: float = 200.0
 
@@ -621,7 +622,7 @@ def match_one_raw_with_numba(
     matched_intensities: np.ndarray,
     matched_mz_errs: np.ndarray,
     match_closest: bool = True,
-)->None:
+) -> None:
     """
     Internel function to match fragment mz values to spectrum mz values.
     Matched_mz_errs[i] = np.inf if no peaks are matched.

From 5a9ce6be54e76f3aca02ffe00c71d2404901c962 Mon Sep 17 00:00:00 2001
From: jalew188 <jalew188@gmail.com>
Date: Tue, 18 Jun 2024 13:10:50 +0200
Subject: [PATCH 4/4] #53 FIX add more docs in psm_match.py

---
 alpharaw/match/psm_match.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/alpharaw/match/psm_match.py b/alpharaw/match/psm_match.py
index e058f27..0f5572c 100644
--- a/alpharaw/match/psm_match.py
+++ b/alpharaw/match/psm_match.py
@@ -146,7 +146,10 @@ def _add_missing_columns_to_psm_df(
 
     def _prepare_matching_dfs(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
         """
-        Prepare dataframes to be matched.
+        Prepare empty `fragment_mz_df`, `matched_intensity_df`,
+        and `matched_mz_err_df` dataframes to extract peak matching information
+        for `self.psm_df`. These three dataframes will be only used internally
+        in :class:`PepSpecMatch` objects.
 
         Returns
         -------