Skip to content

Commit

Permalink
Merge pull request #59 from MannLabs/53-docs-utils
Browse files Browse the repository at this point in the history
#53 ADD docs for utils modules
  • Loading branch information
jalew188 committed Jun 18, 2024
2 parents 938c13d + dd785ae commit 42a86c8
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 14 deletions.
43 changes: 34 additions & 9 deletions alpharaw/utils/centroiding.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,59 @@
# Modified from AlphaPept
from typing import Tuple

import numpy as np
from numba import njit


@njit
def naive_centroid(
peak_mzs,
peak_intens,
centroiding_ppm=20.0,
):
peak_mzs: np.ndarray,
peak_intensities: np.ndarray,
centroiding_ppm: float = 20.0,
) -> Tuple[np.ndarray, np.ndarray]:
"""
A naive centroiding algorithm.
Parameters
----------
peak_mzs : np.ndarray
peak m/z values to centroid.
peak_intensities : np.ndarray
peak intensities to centroid.
centroiding_ppm : float, optional
The centroiding ppm, by default 20.0
Returns
-------
Tuple
ndarray: peak m/z array
ndarray: peak intensity array
"""
mz_list = []
inten_list = []
start, stop = 0, 1
centroiding_peak_tols = 2 * peak_mzs * centroiding_ppm * 1e-6
while start < len(peak_mzs):
stop = _find_sister_peaks(peak_mzs, centroiding_peak_tols, start)
mz_list.append(
np.average(peak_mzs[start:stop], weights=peak_intens[start:stop])
np.average(peak_mzs[start:stop], weights=peak_intensities[start:stop])
)
inten_list.append(np.sum(peak_intens[start:stop]))
inten_list.append(np.sum(peak_intensities[start:stop]))
start = stop
return (
np.array(mz_list, dtype=peak_mzs.dtype),
np.array(inten_list, dtype=peak_intens.dtype),
np.array(inten_list, dtype=peak_intensities.dtype),
)


@njit
def _find_sister_peaks(peak_mzs, centroiding_peak_tols, start):
def _find_sister_peaks(
peak_mzs: np.ndarray, centroiding_peak_tols: np.ndarray, start: int
):
"""
Find sister peak stop idx for the given start idx.
Sister peaks refers to peaks from the same ion in profile mode.
Internal function.
"""
stop = start + 1
for i in range(start + 1, len(peak_mzs)):
if peak_mzs[i] - peak_mzs[start] <= centroiding_peak_tols[start]:
Expand Down
30 changes: 25 additions & 5 deletions alpharaw/utils/ms_path_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

_special_ms_exts: list = [
_SPECIAL_MS_EXTS: list = [
".ms_data.hdf", # alphapept
".raw.hdf", # alpharaw
".raw.hdf5", # alpharaw
Expand All @@ -14,10 +14,30 @@
]


def get_raw_name(ms_file: str, special_ms_exts: list = _special_ms_exts):
def get_raw_name(ms_file: str) -> str:
"""
Get `raw_name` (base name of RAW data file) from the MS file path
by removing the extensions defined in :data:`_SPECIAL_MS_EXTS`.
Parameters
----------
ms_file : str
The absolute or relative path of the RAW file.
Returns
-------
str
The `raw_name` without extension.
Examples
--------
>>> get_raw_name("/MS/files/your_raw_name.raw")
'your_raw_name'
"""
raw_name = os.path.basename(ms_file)
lower_name = raw_name.lower()
for _ext in special_ms_exts:
for _ext in _SPECIAL_MS_EXTS:
if lower_name.endswith(_ext.lower()):
raw_name = raw_name[: -len(_ext)]
break
Expand All @@ -27,7 +47,7 @@ def get_raw_name(ms_file: str, special_ms_exts: list = _special_ms_exts):


def parse_ms_files_to_dict(
ms_file_list: list, special_ms_exts: list = _special_ms_exts
ms_file_list: list,
) -> dict:
"""
Parse spectrum file paths into a dict:
Expand All @@ -49,6 +69,6 @@ def parse_ms_files_to_dict(

ms_file_dict = {}
for ms_file in ms_file_list:
raw_name = get_raw_name(ms_file, special_ms_exts)
raw_name = get_raw_name(ms_file, _SPECIAL_MS_EXTS)
ms_file_dict[raw_name] = ms_file
return ms_file_dict

0 comments on commit 42a86c8

Please sign in to comment.