From 29d6d3534ba972302177309f594af2b49895fb7c Mon Sep 17 00:00:00 2001 From: jalew188 Date: Fri, 14 Jun 2024 23:20:45 +0200 Subject: [PATCH] #53 ADD docs for viz.df_utils.py --- alpharaw/viz/df_utils.py | 383 +++++++-- nbs/test_viz.ipynb | 1612 +------------------------------------- 2 files changed, 374 insertions(+), 1621 deletions(-) diff --git a/alpharaw/viz/df_utils.py b/alpharaw/viz/df_utils.py index 8c1ff91..7740c13 100644 --- a/alpharaw/viz/df_utils.py +++ b/alpharaw/viz/df_utils.py @@ -14,102 +14,202 @@ from alpharaw.match.match_utils import match_closest_peaks, match_highest_peaks -def make_psm_plot_df_for_peptide( - spec_masses: np.ndarray, - spec_intensities: np.ndarray, +def make_query_df_for_peptide( sequence: str, mods: str, mod_sites: str, charge: int, rt_sec: float = 0.0, mobility: float = 0.0, - ppm: float = 20.0, + ms_level: int = 2, charged_frag_types: list = ["b_z1", "b_z2", "y_z1", "y_z2"], include_fragments: bool = True, fragment_intensity_df: pd.DataFrame = None, include_precursor_isotopes: bool = False, max_isotope: int = 6, min_frag_mz: float = 100.0, - match_mode: typing.Literal["closest", "highest"] = "closest", + min_frag_intensity: float = 0.001, ) -> pd.DataFrame: - plot_df = make_query_df_for_peptide( + """ + Create a query dataframe for a peptide that can be + use to generate MS2 or XIC plots. + + Parameters + ---------- + sequence : str + Peptide sequence. + mods : str + Modification names in alphabase format. + mod_sites : str + Modification sites in alphabase format. + charge : int + Charge state of the peptide. + rt_sec : float, optional + Retention time in seconds of the peptide, by default 0.0 + mobility : float, optional + Ion mobility of the peptide, by default 0.0 + ms_level : int, optional + MS level. If it is 1, `include_fragments` is always False. By default 2 + charged_frag_types : list, optional + Charge states of fragments, by default ["b_z1", "b_z2", "y_z1", "y_z2"] + include_fragments : bool, optional + If include fragments in plot_df, by default True + fragment_intensity_df : pd.DataFrame, optional + Fragment intensity dataframe of the peptide for mirror plot, by default None + include_precursor_isotopes : bool, optional + If plot_df include precursor isotopes, by default False + max_isotope : int, optional + Maximal number of precursor isotopes, by default 6 + min_frag_mz : float, optional + Minimal m/z value of fragments, by default 100.0 + min_frag_intensity : float, optional + Minimal intensity of fragments to plot, by default 0.001 + + Returns + ------- + pd.DataFrame + Query dataframe for plotting with possible columns: + 'mz', 'type', 'loss_type', 'charge', 'number', 'fragment_site', + 'ion_name', 'sequence', 'mods', 'mod_sites', 'precursor_charge', + 'modified_sequence', 'rt_sec', 'precursor_mz', 'precursor_i_0', + 'precursor_i_1', 'precursor_i_2', 'precursor_i_3', 'precursor_i_4', + 'precursor_i_5', 'precursor_mono_idx' + """ + if ms_level == 1: + include_fragments = False + precursor_df, fragment_mz_df = make_precursor_fragment_df( sequence, mods, mod_sites, charge, - rt_sec=rt_sec, - mobility=mobility, - charged_frag_types=charged_frag_types, include_fragments=include_fragments, - fragment_intensity_df=fragment_intensity_df, + charged_frag_types=charged_frag_types, include_precursor_isotopes=include_precursor_isotopes, max_isotope=max_isotope, - min_frag_mz=min_frag_mz, ) + if fragment_intensity_df is not None: + columns = np.intersect1d( + fragment_mz_df.columns.values, + fragment_intensity_df.columns.values, + ) + fragment_mz_df = fragment_mz_df[columns] + fragment_intensity_df = fragment_intensity_df[columns] - return make_psm_plot_df( - spec_masses=spec_masses, - spec_intensities=spec_intensities, - query_masses=plot_df.mz.values, - query_ion_names=plot_df.ion_name.values, - query_mass_tols=plot_df.mz.values * ppm * 1e-6, - query_frag_idxes=plot_df.fragment_site.values, - modified_sequence=plot_df.modified_sequence.values[0], - mod_sites=mod_sites, - query_intensities=plot_df.intensity.values - if "intensity" in plot_df.columns - else None, - match_mode=match_mode, + return translate_frag_df_to_plot_df( + precursor_df, + fragment_mz_df, + fragment_intensity_df=fragment_intensity_df, + rt_sec=rt_sec, + mobility=mobility, + ms_level=ms_level, + min_frag_mz=min_frag_mz, + min_frag_intensity=min_frag_intensity, ) -def make_query_df_for_peptide( +def make_psm_plot_df_for_peptide( + spec_masses: np.ndarray, + spec_intensities: np.ndarray, sequence: str, mods: str, mod_sites: str, charge: int, rt_sec: float = 0.0, mobility: float = 0.0, - ms_level: int = 2, + ppm: float = 20.0, charged_frag_types: list = ["b_z1", "b_z2", "y_z1", "y_z2"], include_fragments: bool = True, fragment_intensity_df: pd.DataFrame = None, include_precursor_isotopes: bool = False, max_isotope: int = 6, min_frag_mz: float = 100.0, + min_frag_intensity: float = 0.001, + match_mode: typing.Literal["closest", "highest"] = "closest", ) -> pd.DataFrame: - if ms_level == 1: - include_fragments = False - precursor_df, fragment_mz_df = make_precursor_fragment_df( + """ + Create a plot dataframe for a MS2 spectrum and a peptide. + + Parameters + ---------- + spec_masses : np.ndarray + Peak m/z values of a spectrum. + spec_intensities : np.ndarray + Peak intensities of a spectrum. + sequence : str + Peptide sequence. + mods : str + Modification names in alphabase format. + mod_sites : str + Modification sites in alphabase format. + charge : int + Charge state of the peptide. + rt_sec : float, optional + Retention time in seconds of the peptide, by default 0.0 + mobility : float, optional + Ion mobility of the peptide, by default 0.0 + ppm : float, optional + Matching mass tolerance in ppm, by default 20.0 + charged_frag_types : list, optional + Charge states of fragments, by default ["b_z1", "b_z2", "y_z1", "y_z2"] + include_fragments : bool, optional + If include fragments in plot_df, by default True + fragment_intensity_df : pd.DataFrame, optional + Fragment intensity dataframe of the peptide for mirror plot, by default None + include_precursor_isotopes : bool, optional + If plot_df include precursor isotopes, by default False + max_isotope : int, optional + Maximal number of precursor isotopes, by default 6 + min_frag_mz : float, optional + Minimal m/z value of fragments, by default 100.0 + min_frag_intensity : float, optional + Minimal intensity of fragments to plot, by default 0.001 + match_mode : "closest", "highest", optional + If extract the closest peak or highest peak within + the given matching tolerance, by default "closest" + + Returns + ------- + pd.DataFrame + Plot dataframe with possible columns: + "modified_sequence", "mz", "intensity", "fragment_site", + "ppm_err", "mass_err", "ion_name", "mod_sites", + "precursor_mz", "precursor_i_0", + "precursor_i_1", "precursor_i_2", "precursor_i_3", "precursor_i_4", + "precursor_i_5", "precursor_mono_idx", "color" + """ + plot_df = make_query_df_for_peptide( sequence, mods, mod_sites, charge, - include_fragments=include_fragments, + rt_sec=rt_sec, + mobility=mobility, charged_frag_types=charged_frag_types, + include_fragments=include_fragments, + fragment_intensity_df=fragment_intensity_df, include_precursor_isotopes=include_precursor_isotopes, max_isotope=max_isotope, + min_frag_mz=min_frag_mz, + min_frag_intensity=min_frag_intensity, ) - if fragment_intensity_df is not None: - columns = np.intersect1d( - fragment_mz_df.columns.values, - fragment_intensity_df.columns.values, - ) - fragment_mz_df = fragment_mz_df[columns] - fragment_intensity_df = fragment_intensity_df[columns] - return translate_frag_df_to_plot_df( - precursor_df, - fragment_mz_df, - fragment_intensity_df=fragment_intensity_df, - rt_sec=rt_sec, - mobility=mobility, - ms_level=ms_level, - min_frag_mz=min_frag_mz, + return make_psm_plot_df( + spec_masses=spec_masses, + spec_intensities=spec_intensities, + query_masses=plot_df.mz.values, + query_ion_names=plot_df.ion_name.values, + query_mass_tols=plot_df.mz.values * ppm * 1e-6, + query_frag_idxes=plot_df.fragment_site.values, + modified_sequence=plot_df.modified_sequence.values[0], + mod_sites=mod_sites, + query_intensities=plot_df.intensity.values + if "intensity" in plot_df.columns + else None, + match_mode=match_mode, ) -def make_psm_plot_for_frag_dfs( +def make_psm_plot_df_for_frag_dfs( spec_masses: np.ndarray, spec_intensities: np.ndarray, precursor_df: pd.DataFrame, @@ -119,7 +219,41 @@ def make_psm_plot_for_frag_dfs( min_frag_mz: float = 100.0, min_frag_intensity: float = 0.001, match_mode: typing.Literal["closest", "highest"] = "closest", -): +) -> pd.DataFrame: + """ + Similar to :func:`make_psm_plot_df_for_peptide`, but this function + does not start from a peptide, but from its fragment_mz_df. + + Parameters + ---------- + spec_masses : np.ndarray + Peak m/z values of a spectrum. + spec_intensities : np.ndarray + Peak intensities of a spectrum. + precursor_df : pd.DataFrame + This must be alphabase precursor_df with only one precursor. + fragment_mz_df : pd.DataFrame + The alphabase fragment_mz_df of the `precursor_df`. + fragment_intensity_df : pd.DataFrame, optional + Fragment intensity dataframe of precursor for mirror plot, by default None + ppm : float, optional + Matching mass tolerance in ppm, by default 20.0 + min_frag_mz : float, optional + Minimal m/z value of fragments, by default 100.0 + min_frag_intensity : float, optional + Minimal intensity of fragments to plot, by default 0.001 + match_mode : "closest", "highest", optional + If extract the closest peak or highest peak within + the given matching tolerance, by default "closest" + + Returns + ------- + DataFrame + Plot dataframe with possible columns: + "modified_sequence", "mz", "intensity", "fragment_site", + "ppm_err", "mass_err", "ion_name", "mod_sites", + "precursor_mz", "color" + """ plot_df = translate_frag_df_to_plot_df( precursor_df, fragment_mz_df, @@ -128,6 +262,11 @@ def make_psm_plot_for_frag_dfs( min_frag_intensity=min_frag_intensity, ) + if "intensity" in plot_df.columns: + query_intensities = plot_df.intensity.values + else: + query_intensities = None + return make_psm_plot_df( spec_masses=spec_masses, spec_intensities=spec_intensities, @@ -137,9 +276,7 @@ def make_psm_plot_for_frag_dfs( query_frag_idxes=plot_df.fragment_site.values, modified_sequence=plot_df.modified_sequence.values[0], mod_sites=precursor_df.mod_sites.values[0], - query_intensities=plot_df.intensity.values - if "intensity" in plot_df.columns - else None, + query_intensities=query_intensities, match_mode=match_mode, ) @@ -152,6 +289,30 @@ def make_query_df( query_im: float = 0.0, query_intensities: np.ndarray = None, ) -> pd.DataFrame: + """ + Create a query dataframe based on query_masses and query_ion_names + to generate MS2 or XIC plots. + + Parameters + ---------- + query_masses : np.ndarray + Query or fragment m/z values. + query_ion_names : typing.List[str] + Query or fragment ion names. + query_rt_sec : float + RT in seconds. + precursor_mz : float + Precursor m/z value. + query_im : float, optional + Ion mobility, by default 0.0 + query_intensities : np.ndarray, optional + Query intensities for mirror plot, by default None + + Returns + ------- + pd.DataFrame + "mz", "intensity", "fragment_site", "ion_name", "precursor_mz" + """ df = pd.DataFrame( dict( mz=query_masses, @@ -179,6 +340,43 @@ def make_psm_plot_df( query_intensities: np.ndarray = None, match_mode: typing.Literal["closest", "highest"] = "closest", ) -> pd.DataFrame: + """ + Create a plot dataframe for a MS2 spectrum and query m/z values. + + Parameters + ---------- + spec_masses : np.ndarray + Peak m/z values of a spectrum. + spec_intensities : np.ndarray + Peak intensities of a spectrum. + query_masses : np.ndarray + Query or fragment m/z values. + query_ion_names : typing.List[str] + Query or fragment ion names. + query_mass_tols : np.ndarray + Matching mass tolerance of `query_masses`. + query_frag_idxes : np.ndarray + Fragment indices or positions of `query_masses`. + modified_sequence : str, optional + Modified sequence, such as "AM[+16]DEFGK_(2+)", by default "" + mod_sites : str, optional + Modification sites in alphabase format, by default "" + query_intensities : np.ndarray, optional + Query intensities for mirror plot, by default None + match_mode : "closest", "highest", optional + If extract the closest peak or highest peak within + the given matching tolerance, by default "closest" + + Returns + ------- + pd.DataFrame + Plot dataframe with possible columns: + "modified_sequence", "mz", "intensity", "fragment_site", + "ppm_err", "mass_err", "ion_name", "mod_sites", + "precursor_mz", "precursor_i_0", + "precursor_i_1", "precursor_i_2", "precursor_i_3", "precursor_i_4", + "precursor_i_5", "precursor_mono_idx", "color" + """ query_ion_names = np.array(query_ion_names, dtype="U") if match_mode == "highest": query_matched_idxes = match_highest_peaks( @@ -261,7 +459,28 @@ def PCC_sim(x, y): return df -def get_modified_sequence(sequence: str, mods: str, mod_sites: str, charge: int = 0): +def get_modified_sequence( + sequence: str, mods: str, mod_sites: str, charge: int = 0 +) -> str: + """ + Parse sequence, mods, mod_sites into a single modified sequence string. + + Parameters + ---------- + sequence : str + Peptide sequence. + mods : str + Modifications in alphabase format. + mod_sites : str + Modification sites in alphabase format. + charge : int, optional + Precursor charge, by default 0 + + Returns + ------- + str + Modified sequence. + """ sequence = "_" + sequence + "_" mod_masses = np.zeros(len(sequence)) if mods: @@ -288,6 +507,38 @@ def translate_frag_df_to_plot_df( min_frag_mz: float = 100.0, min_frag_intensity: float = 0.001, ) -> pd.DataFrame: + """ + Translate `precursor_df`, `fragment_mz_df` (and `fragment_intensity_df`) + into a single plot df. + + Parameters + ---------- + precursor_df : pd.DataFrame + Precursor df with a single precursor. + fragment_mz_df : pd.DataFrame + fragment_mz_df for the precursor. + fragment_intensity_df : pd.DataFrame, optional + fragment_intensity_df for the precursor, by default None + rt_sec : float, optional + RT in seconds, by default 0.0 + mobility : float, optional + Ion mobility, by default 0.0 + ms_level : int, optional + MS level. If 2, precursor_mz will be included otherwise it is useless. + By default 2 + min_frag_mz : float, optional + Minimal m/z value of fragments, by default 100.0 + min_frag_intensity : float, optional + Minimal intensity of fragments to plot, by default 0.001 + + Returns + ------- + pd.DataFrame + Query dataframe for plotting with possible columns: + 'mz', 'type', 'loss_type', 'charge', 'number', 'fragment_site', + 'ion_name', 'sequence', 'mods', 'mod_sites', 'precursor_charge', + 'modified_sequence', 'rt_sec', 'precursor_mz' + """ fragment_mz_df = fragment_mz_df.mask(fragment_mz_df < min_frag_mz, 0) if fragment_intensity_df is None: fragment_intensity_df = pd.DataFrame() @@ -379,7 +630,35 @@ def make_precursor_fragment_df( charged_frag_types: list = ["b_z1", "b_z2", "y_z1", "y_z2"], include_precursor_isotopes: bool = False, max_isotope: int = 6, -): +) -> typing.Tuple[pd.DataFrame, pd.DataFrame]: + """ + Create precursor_df and fragment_mz_df for a peptide. + + Parameters + ---------- + sequence : str + Peptide sequence + mods : str + Modifications in alphabase format + mod_sites : str + Modification sites in alphabase format + charge : int + Precursor charge state + include_fragments : bool, optional + If calculate fragments (fragment_mz_df), by default True + charged_frag_types : list, optional + Fragment charge states, by default ["b_z1", "b_z2", "y_z1", "y_z2"] + include_precursor_isotopes : bool, optional + If calculate precursor isotopes, by default False + max_isotope : int, optional + Maximal number of isotopes, by default 6 + + Returns + ------- + Tuple + pd.DataFrame: precursor dataframe. + pd.DataFrame: fragment dataframe. Empty dataframe if include_fragments==False + """ precursor_df = pd.DataFrame( dict(sequence=[sequence], mods=[mods], mod_sites=[mod_sites], charge=charge) ) diff --git a/nbs/test_viz.ipynb b/nbs/test_viz.ipynb index 123adbd..f9a680a 100644 --- a/nbs/test_viz.ipynb +++ b/nbs/test_viz.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -38,405 +38,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
raw_namesequencechargertrt_startrt_stopmobilityproteinsuniprot_idsgenesscan_numscorefdrdiann_spec_idxmodsmod_sitesnAArt_normprecursor_mzccs
020190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...HLQLAIR254.542254.438554.74890H2AX_HUMANQ71UI9;Q9BTM1;P0C0S5;P16104;Q96KK5;Q96QV6;Q998...H2AX752690.9919635.351050e-047526870.671811425.7665120.0
120190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...KLEDGPK227.622427.518827.76080EF1A1_HUMAN;EF1A3_HUMANP68104;Q5VTE0EEF1A1;EEF1A1P5378750.9937934.318230e-043787470.340233393.7214360.0
220190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...MVQEAEK229.974729.871030.07840HSP7C_HUMANP0DMV9;P11142;P0DMV8HSPA8411410.9826279.529970e-044114070.369207417.7049290.0
320190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAEQLR233.777133.673433.88090B3GA3_HUMANO94766B3GAT3464180.9368622.747850e-034641770.416042379.7114020.0
420190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...LPLQDVYK264.584964.481864.75750EF1A2_HUMANP68104;Q5VTE0;Q05639EEF1A2892420.9893736.590750e-048924180.795510488.2791180.0
...............................................................
8020190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAAAAAAAAAAAAAGAGAGAK366.859966.756466.99700S12A2_HUMANP55011SLC12A2924140.9999832.060000e-0592413220.823532532.9532650.0
8120190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAAVGAGHGAGGPGAASSSGGAR335.136535.032735.24020S35E1_HUMANQ96K37SLC35E1483080.9580221.908840e-0348307240.432786622.3051380.0
8220190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAASAAEAGIATTGTEDSDDALLK367.111767.008467.21520PSMD4_HUMANP55036PSMD4927680.9990711.766390e-0492767250.826633774.0432030.0
8320190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAAATVVPPMVGGPPFVGPVGFGPGDR378.598278.459878.70200RBM42_HUMANQ9BTD8RBM421087591.0000001.000000e-09108758280.968116864.4528140.0
8420190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest...AAAAAAAAAPAAAATAPTTAATTAATAAQ366.561666.458366.66500SRP14_HUMANP37108SRP14920010.9999842.060000e-0592000290.819857790.0749490.0
\n", - "

85 rows × 20 columns

\n", - "
" - ], - "text/plain": [ - " raw_name \\\n", - "0 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "1 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "2 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "3 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "4 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - ".. ... \n", - "80 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "81 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "82 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "83 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "84 20190914_HFX4_SeHu_HeLa-400ng_DIA-GradientTest... \n", - "\n", - " sequence charge rt rt_start rt_stop \\\n", - "0 HLQLAIR 2 54.5422 54.4385 54.7489 \n", - "1 KLEDGPK 2 27.6224 27.5188 27.7608 \n", - "2 MVQEAEK 2 29.9747 29.8710 30.0784 \n", - "3 AAAEQLR 2 33.7771 33.6734 33.8809 \n", - "4 LPLQDVYK 2 64.5849 64.4818 64.7575 \n", - ".. ... ... ... ... ... \n", - "80 AAAAAAAAAAAAAAAGAGAGAK 3 66.8599 66.7564 66.9970 \n", - "81 AAAAVGAGHGAGGPGAASSSGGAR 3 35.1365 35.0327 35.2402 \n", - "82 AAAASAAEAGIATTGTEDSDDALLK 3 67.1117 67.0084 67.2152 \n", - "83 AAAAATVVPPMVGGPPFVGPVGFGPGDR 3 78.5982 78.4598 78.7020 \n", - "84 AAAAAAAAAPAAAATAPTTAATTAATAAQ 3 66.5616 66.4583 66.6650 \n", - "\n", - " mobility proteins \\\n", - "0 0 H2AX_HUMAN \n", - "1 0 EF1A1_HUMAN;EF1A3_HUMAN \n", - "2 0 HSP7C_HUMAN \n", - "3 0 B3GA3_HUMAN \n", - "4 0 EF1A2_HUMAN \n", - ".. ... ... \n", - "80 0 S12A2_HUMAN \n", - "81 0 S35E1_HUMAN \n", - "82 0 PSMD4_HUMAN \n", - "83 0 RBM42_HUMAN \n", - "84 0 SRP14_HUMAN \n", - "\n", - " uniprot_ids genes \\\n", - "0 Q71UI9;Q9BTM1;P0C0S5;P16104;Q96KK5;Q96QV6;Q998... H2AX \n", - "1 P68104;Q5VTE0 EEF1A1;EEF1A1P5 \n", - "2 P0DMV9;P11142;P0DMV8 HSPA8 \n", - "3 O94766 B3GAT3 \n", - "4 P68104;Q5VTE0;Q05639 EEF1A2 \n", - ".. ... ... \n", - "80 P55011 SLC12A2 \n", - "81 Q96K37 SLC35E1 \n", - "82 P55036 PSMD4 \n", - "83 Q9BTD8 RBM42 \n", - "84 P37108 SRP14 \n", - "\n", - " scan_num score fdr diann_spec_idx mods mod_sites nAA \\\n", - "0 75269 0.991963 5.351050e-04 75268 7 \n", - "1 37875 0.993793 4.318230e-04 37874 7 \n", - "2 41141 0.982627 9.529970e-04 41140 7 \n", - "3 46418 0.936862 2.747850e-03 46417 7 \n", - "4 89242 0.989373 6.590750e-04 89241 8 \n", - ".. ... ... ... ... ... ... ... \n", - "80 92414 0.999983 2.060000e-05 92413 22 \n", - "81 48308 0.958022 1.908840e-03 48307 24 \n", - "82 92768 0.999071 1.766390e-04 92767 25 \n", - "83 108759 1.000000 1.000000e-09 108758 28 \n", - "84 92001 0.999984 2.060000e-05 92000 29 \n", - "\n", - " rt_norm precursor_mz ccs \n", - "0 0.671811 425.766512 0.0 \n", - "1 0.340233 393.721436 0.0 \n", - "2 0.369207 417.704929 0.0 \n", - "3 0.416042 379.711402 0.0 \n", - "4 0.795510 488.279118 0.0 \n", - ".. ... ... ... \n", - "80 0.823532 532.953265 0.0 \n", - "81 0.432786 622.305138 0.0 \n", - "82 0.826633 774.043203 0.0 \n", - "83 0.968116 864.452814 0.0 \n", - "84 0.819857 790.074949 0.0 \n", - "\n", - "[85 rows x 20 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from alphabase.psm_reader.dia_psm_reader import DiannReader\n", "from io import StringIO\n", @@ -535,18 +139,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:WARNING: Temp mmap arrays are written to /var/folders/fh/hf8t3l1x02d42ggk3b304_rh0000gn/T/temp_mmap_m_g01bb6. Cleanup of this folder is OS dependant, and might need to be triggered manually! Current space: 594,288,431,104\n", - "WARNING:root:WARNING: No Bruker libraries are available for this operating system. Mobility and m/z values need to be estimated. While this estimation often returns acceptable results with errors < 0.02 Th, huge errors (e.g. offsets of 6 Th) have already been observed for some samples!\n" - ] - } - ], + "outputs": [], "source": [ "from alpharaw.viz.psm_plot import PSM_Plot\n", "from alpharaw.viz.xic_plot import XIC_Plot\n", @@ -683,20 +278,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3272.532" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "i_psm = 0\n", "psm_rt_sec = psm_df.rt.values[i_psm]*60\n", @@ -710,7 +294,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -725,28 +309,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([425.26486186, 425.76651186, 426.26816186, 426.76981186,\n", - " 427.27146186, 427.77311186, 428.27476186, 428.77641186])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "psm_mz+np.arange(-1,7)*1.0033/psm_charge" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -765,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -778,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -796,117 +368,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
intensitymz
263257951.340309e+06101.071152
263257968.808889e+05103.053886
263257971.274483e+06104.053070
263257985.006362e+05106.500206
263257999.967048e+05109.731781
.........
263260669.956638e+06723.452393
263260678.864892e+06724.444397
263260682.661402e+06725.436951
263260692.712479e+06730.404236
263260709.344985e+05731.411865
\n", - "

276 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " intensity mz\n", - "26325795 1.340309e+06 101.071152\n", - "26325796 8.808889e+05 103.053886\n", - "26325797 1.274483e+06 104.053070\n", - "26325798 5.006362e+05 106.500206\n", - "26325799 9.967048e+05 109.731781\n", - "... ... ...\n", - "26326066 9.956638e+06 723.452393\n", - "26326067 8.864892e+06 724.444397\n", - "26326068 2.661402e+06 725.436951\n", - "26326069 2.712479e+06 730.404236\n", - "26326070 9.344985e+05 731.411865\n", - "\n", - "[276 rows x 2 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "one_peak_df = get_peak_df_list(\n", " spec_df, peak_df, psm_rt_sec, psm_mz, n=1\n", @@ -916,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -927,153 +391,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
raw_namesequencechargertrt_startrt_stopmobilityproteinsuniprot_idsgenesscan_numscorefdrdiann_spec_idxmodsmod_sitesnAArt_normprecursor_mzccs
25PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da...RPAGSVQNPVYHNQPLNPAPSR455.066254.875855.25640EGFR_HUMANP00533EGFR420610.9991810.00012242060Phospho@S21220.410663620.5552150.0
26PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da...RPAGSVQNPVYHNQPLNPAPSR456.658056.464956.84830EGFR_HUMANP00533EGFR433110.9990370.00012243310Phospho@S5220.422534620.5552150.0
28PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da...RPAGSVQNPVYHNQPLNPAPSR453.347353.156953.53750EGFR_HUMANP00533EGFR407110.9997030.00012240710Phospho@Y11220.397844620.5552150.0
\n", - "
" - ], - "text/plain": [ - " raw_name sequence \\\n", - "25 PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da... RPAGSVQNPVYHNQPLNPAPSR \n", - "26 PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da... RPAGSVQNPVYHNQPLNPAPSR \n", - "28 PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da... RPAGSVQNPVYHNQPLNPAPSR \n", - "\n", - " charge rt rt_start rt_stop mobility proteins uniprot_ids \\\n", - "25 4 55.0662 54.8758 55.2564 0 EGFR_HUMAN P00533 \n", - "26 4 56.6580 56.4649 56.8483 0 EGFR_HUMAN P00533 \n", - "28 4 53.3473 53.1569 53.5375 0 EGFR_HUMAN P00533 \n", - "\n", - " genes scan_num score fdr diann_spec_idx mods mod_sites \\\n", - "25 EGFR 42061 0.999181 0.000122 42060 Phospho@S 21 \n", - "26 EGFR 43311 0.999037 0.000122 43310 Phospho@S 5 \n", - "28 EGFR 40711 0.999703 0.000122 40710 Phospho@Y 11 \n", - "\n", - " nAA rt_norm precursor_mz ccs \n", - "25 22 0.410663 620.555215 0.0 \n", - "26 22 0.422534 620.555215 0.0 \n", - "28 22 0.397844 620.555215 0.0 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from io import StringIO\n", "from alphabase.psm_reader.dia_psm_reader import DiannReader\n", @@ -1117,251 +437,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chargeinjection_timeisolation_lower_mzisolation_upper_mzms_levelncepeak_start_idxpeak_stop_idxprecursor_mzrtspec_idx
0014.627-1.000000-1.00000010.00844-1.0000000.0004360
1050.000500.502502510.502502230.0844980505.5025020.0016471
2055.500510.507507520.507507230.09801114515.5075070.0039492
3055.500520.512512530.512512230.011141285525.5125120.0052363
4055.500530.517517540.517517230.012851505535.5175170.0065184
....................................
126169055.500950.727478960.727478230.07204401272044045955.727478159.997733126169
126170055.500960.732483970.732483230.07204404572044098965.732483159.999014126170
126171055.500970.737488980.737488230.07204409872044129975.737488160.000297126171
126172055.500980.742493990.742493230.07204412972044160985.742493160.001581126172
126173055.500990.7474981000.747498230.07204416072044182995.747498160.002864126173
\n", - "

126174 rows × 11 columns

\n", - "
" - ], - "text/plain": [ - " charge injection_time isolation_lower_mz isolation_upper_mz \\\n", - "0 0 14.627 -1.000000 -1.000000 \n", - "1 0 50.000 500.502502 510.502502 \n", - "2 0 55.500 510.507507 520.507507 \n", - "3 0 55.500 520.512512 530.512512 \n", - "4 0 55.500 530.517517 540.517517 \n", - "... ... ... ... ... \n", - "126169 0 55.500 950.727478 960.727478 \n", - "126170 0 55.500 960.732483 970.732483 \n", - "126171 0 55.500 970.737488 980.737488 \n", - "126172 0 55.500 980.742493 990.742493 \n", - "126173 0 55.500 990.747498 1000.747498 \n", - "\n", - " ms_level nce peak_start_idx peak_stop_idx precursor_mz \\\n", - "0 1 0.0 0 844 -1.000000 \n", - "1 2 30.0 844 980 505.502502 \n", - "2 2 30.0 980 1114 515.507507 \n", - "3 2 30.0 1114 1285 525.512512 \n", - "4 2 30.0 1285 1505 535.517517 \n", - "... ... ... ... ... ... \n", - "126169 2 30.0 72044012 72044045 955.727478 \n", - "126170 2 30.0 72044045 72044098 965.732483 \n", - "126171 2 30.0 72044098 72044129 975.737488 \n", - "126172 2 30.0 72044129 72044160 985.742493 \n", - "126173 2 30.0 72044160 72044182 995.747498 \n", - "\n", - " rt spec_idx \n", - "0 0.000436 0 \n", - "1 0.001647 1 \n", - "2 0.003949 2 \n", - "3 0.005236 3 \n", - "4 0.006518 4 \n", - "... ... ... \n", - "126169 159.997733 126169 \n", - "126170 159.999014 126170 \n", - "126171 160.000297 126171 \n", - "126172 160.001581 126172 \n", - "126173 160.002864 126173 \n", - "\n", - "[126174 rows x 11 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "raw_file = \"/Users/wenfengzeng/data/orbi_dia/synphos/PhosphoPooledSynth1xdY_DIA60K10Da30K500_1000Da_160min50cm_19031101.raw.hdf\"\n", "\n", @@ -1374,20 +452,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3303.972" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "i_phos = 0\n", "phos_rt_sec = phos_df.rt.values[i_phos]*60\n", @@ -1402,30 +469,16 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "25 21\n", - "26 5\n", - "28 11\n", - "Name: mod_sites, dtype: object" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "phos_df.mod_sites" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1434,7 +487,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1451,7 +504,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1465,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1487,619 +540,40 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modified_sequencemzintensityfragment_siteppm_errmass_errion_namemod_sitescolor
0_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)110.0712431.006920e+07-10.0000000.000000-21lightgrey
1_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)111.0744634.443796e+05-10.0000000.000000-21lightgrey
2_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)112.0871126.159941e+05-10.0000000.000000-21lightgrey
3_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)113.0708691.202546e+05-10.0000000.000000-21lightgrey
4_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)114.1021881.011716e+05-10.0000000.000000-21lightgrey
..............................
735_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)304.1336065.126084e+0516-2.107196-0.000641y5++21lightblue
736_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)510.2071847.633365e+051713.5179840.006897y4+21lightblue
737_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)439.1700748.596875e+0518-4.099863-0.001801y3+21lightblue
738_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)220.0886691.208201e+0518-0.207991-0.000046y3++21lightblue
739_RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+)175.1189582.989494e+0620-0.348535-0.000061y1+21lightblue
\n", - "

740 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " modified_sequence mz intensity \\\n", - "0 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 110.071243 1.006920e+07 \n", - "1 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 111.074463 4.443796e+05 \n", - "2 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 112.087112 6.159941e+05 \n", - "3 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 113.070869 1.202546e+05 \n", - "4 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 114.102188 1.011716e+05 \n", - ".. ... ... ... \n", - "735 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 304.133606 5.126084e+05 \n", - "736 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 510.207184 7.633365e+05 \n", - "737 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 439.170074 8.596875e+05 \n", - "738 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 220.088669 1.208201e+05 \n", - "739 _RPAGSVQNPVYHNQPLNPAPS[+80]R_(4+) 175.118958 2.989494e+06 \n", - "\n", - " fragment_site ppm_err mass_err ion_name mod_sites color \n", - "0 -1 0.000000 0.000000 - 21 lightgrey \n", - "1 -1 0.000000 0.000000 - 21 lightgrey \n", - "2 -1 0.000000 0.000000 - 21 lightgrey \n", - "3 -1 0.000000 0.000000 - 21 lightgrey \n", - "4 -1 0.000000 0.000000 - 21 lightgrey \n", - ".. ... ... ... ... ... ... \n", - "735 16 -2.107196 -0.000641 y5++ 21 lightblue \n", - "736 17 13.517984 0.006897 y4+ 21 lightblue \n", - "737 18 -4.099863 -0.001801 y3+ 21 lightblue \n", - "738 18 -0.207991 -0.000046 y3++ 21 lightblue \n", - "739 20 -0.348535 -0.000061 y1+ 21 lightblue \n", - "\n", - "[740 rows x 9 columns]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "plot_df" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xic_plotter.rt_sec_win = 360\n", - "plot_df = make_query_plot_df_for_peptide(\n", + "plot_df = make_query_df_for_peptide(\n", " phos_seq, phos_mods, phos_sites, \n", " phos_charge, xic_rt_sec,\n", " ms_level=2, include_precursor_isotopes=True\n", ")\n", - "plot_df[\"color\"] = \"lightblue\"\n", - "plot_df.loc[plot_df.fragment_site.isin(site_specific_idxes),\"color\"] = \"red\"\n", - "fig = xic_plotter.plot(\n", - " phos_spec_df, phos_peak_df, plot_df.sort_values(\"color\"), \n", - " title=plot_df.modified_sequence.values[0]\n", - ")" + "plot_df" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
mztypeloss_typechargenumberfragment_siteion_namesequencemodsmod_sites...rt_secprecursor_mzprecursor_i_0precursor_i_1precursor_i_2precursor_i_3precursor_i_4precursor_i_5precursor_mono_idxcolor
0157.108383980.01.01.00b1+RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
12323.0979001210.01.021.00y21+RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
21162.0526121210.02.021.00y21++RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
3254.161148980.01.02.01b2+RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
4127.584213980.02.02.01b2++RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
..................................................................
83620.806030770.00.00.0-1M1RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
84621.056885770.00.00.0-1M2RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
85621.307678770.00.00.0-1M3RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
86621.558533770.00.00.0-1M4RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
87621.809326770.00.00.0-1M5RPAGSVQNPVYHNQPLNPAPSRPhospho@S21...3303.972620.5552150.2617680.3360850.2318480.1130070.0433880.0139050lightblue
\n", - "

88 rows × 22 columns

\n", - "
" - ], - "text/plain": [ - " mz type loss_type charge number fragment_site ion_name \\\n", - "0 157.108383 98 0.0 1.0 1.0 0 b1+ \n", - "1 2323.097900 121 0.0 1.0 21.0 0 y21+ \n", - "2 1162.052612 121 0.0 2.0 21.0 0 y21++ \n", - "3 254.161148 98 0.0 1.0 2.0 1 b2+ \n", - "4 127.584213 98 0.0 2.0 2.0 1 b2++ \n", - ".. ... ... ... ... ... ... ... \n", - "83 620.806030 77 0.0 0.0 0.0 -1 M1 \n", - "84 621.056885 77 0.0 0.0 0.0 -1 M2 \n", - "85 621.307678 77 0.0 0.0 0.0 -1 M3 \n", - "86 621.558533 77 0.0 0.0 0.0 -1 M4 \n", - "87 621.809326 77 0.0 0.0 0.0 -1 M5 \n", - "\n", - " sequence mods mod_sites ... rt_sec precursor_mz \\\n", - "0 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "1 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "2 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "3 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "4 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - ".. ... ... ... ... ... ... \n", - "83 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "84 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "85 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "86 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "87 RPAGSVQNPVYHNQPLNPAPSR Phospho@S 21 ... 3303.972 620.555215 \n", - "\n", - " precursor_i_0 precursor_i_1 precursor_i_2 precursor_i_3 precursor_i_4 \\\n", - "0 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "1 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "2 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "3 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "4 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - ".. ... ... ... ... ... \n", - "83 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "84 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "85 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "86 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "87 0.261768 0.336085 0.231848 0.113007 0.043388 \n", - "\n", - " precursor_i_5 precursor_mono_idx color \n", - "0 0.013905 0 lightblue \n", - "1 0.013905 0 lightblue \n", - "2 0.013905 0 lightblue \n", - "3 0.013905 0 lightblue \n", - "4 0.013905 0 lightblue \n", - ".. ... ... ... \n", - "83 0.013905 0 lightblue \n", - "84 0.013905 0 lightblue \n", - "85 0.013905 0 lightblue \n", - "86 0.013905 0 lightblue \n", - "87 0.013905 0 lightblue \n", - "\n", - "[88 rows x 22 columns]" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "plot_df[\"color\"] = \"lightblue\"\n", + "plot_df.loc[plot_df.fragment_site.isin(site_specific_idxes),\"color\"] = \"red\"\n", + "fig = xic_plotter.plot(\n", + " phos_spec_df, phos_peak_df, plot_df.sort_values(\"color\"), \n", + " title=plot_df.modified_sequence.values[0]\n", + ")\n", "plot_df" ] },