diff --git a/.bumpversion.cfg b/.bumpversion.cfg index f952ba8d..bc9b7174 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.2 +current_version = 0.4.3 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/HISTORY.md b/HISTORY.md index bfb51bf7..02a31368 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # Changelog +# 0.4.3 +* FIX loading dataset with columns +* ADD log2-transformation + # 0.4.2 * ADD option compare_preprocessing_modes * update to streamlit 1.19 with new caching functions diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py index b0ff98f2..17689f54 100644 --- a/alphastats/DataSet.py +++ b/alphastats/DataSet.py @@ -115,7 +115,7 @@ def create_matrix(self): """ regex_find_intensity_columns = self.intensity_column.replace("[sample]", ".*") - + df = self.rawinput df = df.set_index(self.index_column) df = df.filter(regex=(regex_find_intensity_columns), axis=1) @@ -159,6 +159,7 @@ def load_metadata(self, file_path): # check whether sample labeling matches protein data # warnings.warn("WARNING: Sample names do not match sample labelling in protein data") + df.columns = df.columns.astype(str) self.metadata = df def _save_dataset_info(self): @@ -168,6 +169,7 @@ def _save_dataset_info(self): "Matrix: Number of ProteinIDs/ProteinGroups": self.mat.shape[1], "Matrix: Number of samples": self.mat.shape[0], "Intensity used for analysis": self.intensity_column, + "Log2-transformed": False, "Normalization": None, "Imputation": None, "Contaminations have been removed": False, diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py index 281559cd..c0a6fe4b 100644 --- a/alphastats/DataSet_Preprocess.py +++ b/alphastats/DataSet_Preprocess.py @@ -129,7 +129,7 @@ def _imputation(self, method): @ignore_warning(UserWarning) @ignore_warning(RuntimeWarning) def _normalization(self, method): - + if method == "zscore": scaler = sklearn.preprocessing.StandardScaler() normalized_array = scaler.fit_transform(self.mat.values) @@ -153,11 +153,10 @@ def _normalization(self, method): "Choose from 'zscore', 'quantile', 'linear' normalization. or 'vst' for variance stabilization transformation" ) - # TODO logarithimic normalization - self.mat = pd.DataFrame( normalized_array, index=self.mat.index, columns=self.mat.columns ) + self.preprocessing_info.update({"Normalization": method}) def reset_preprocessing(self): @@ -193,11 +192,16 @@ def _compare_preprocessing_modes(self, func, params_for_func): results_list.append(res) return results_list - + + def _log2_transform(self): + self.mat = np.log2(self.mat + 0.1) + self.preprocessing_info.update({"Log2 Transformed": True}) + @ignore_warning(RuntimeWarning) def preprocess( self, + log2_transform=True, remove_contaminations=False, subset=False, normalization=None, @@ -239,6 +243,7 @@ def preprocess( Args: remove_contaminations (bool, optional): remove ProteinGroups that are identified as contamination. + log2_transform (bool, optional): Log2 transform data. Default to True. normalization (str, optional): method to normalize data: either "zscore", "quantile", "linear". Defaults to None. remove_samples (list, optional): list with sample ids to remove. Defaults to None. imputation (str, optional): method to impute data: either "mean", "median", "knn" or "randomforest". Defaults to None. @@ -249,6 +254,9 @@ def preprocess( if subset: self.mat = self._subset() + + if log2_transform: + self._log2_transform() if normalization is not None: self._normalization(method=normalization) diff --git a/alphastats/__init__.py b/alphastats/__init__.py index 455eb420..4a486e41 100644 --- a/alphastats/__init__.py +++ b/alphastats/__init__.py @@ -1,5 +1,5 @@ __project__ = "alphastats" -__version__ = "0.4.2" +__version__ = "0.4.3" __license__ = "Apache" __description__ = "An open-source Python package for Mass Spectrometry Analysis" __author__ = "Mann Labs" diff --git a/alphastats/gui/pages/03_Preprocessing.py b/alphastats/gui/pages/03_Preprocessing.py index 24d3a8ce..bf4fc045 100644 --- a/alphastats/gui/pages/03_Preprocessing.py +++ b/alphastats/gui/pages/03_Preprocessing.py @@ -29,6 +29,11 @@ def preprocessing(): options=[True, False], ) + log2_transform = st.selectbox( + "Log2-transform dataset", + options=[True, False], + ) + normalization = st.selectbox( "Normalization", options=[None, "zscore", "quantile", "vst", "linear"] ) @@ -42,6 +47,7 @@ def preprocessing(): if submitted: st.session_state.dataset.preprocess( remove_contaminations=remove_contaminations, + log2_transform=log2_transform, subset=subset, normalization=normalization, imputation=imputation, diff --git a/alphastats/loader/AlphaPeptLoader.py b/alphastats/loader/AlphaPeptLoader.py index 846a8620..a3112729 100644 --- a/alphastats/loader/AlphaPeptLoader.py +++ b/alphastats/loader/AlphaPeptLoader.py @@ -41,6 +41,7 @@ def __init__( # add contamination column "Reverse" self._add_contamination_reverse_column() self._add_contamination_column() + self._read_all_columns_as_string() #  make ProteinGroup column self.rawinput["ProteinGroup"] = self.rawinput[self.index_column].map( self._standardize_protein_group_column diff --git a/alphastats/loader/BaseLoader.py b/alphastats/loader/BaseLoader.py index 2cabef6e..836745f6 100644 --- a/alphastats/loader/BaseLoader.py +++ b/alphastats/loader/BaseLoader.py @@ -32,6 +32,7 @@ def __init__(self, file, intensity_column, index_column, sep): self.ptm_df = None self._add_contamination_column() self._check_if_columns_are_present() + self._read_all_columns_as_string() def _check_if_columns_are_present(self): """check if given columns present in rawinput""" @@ -46,9 +47,11 @@ def _check_if_columns_are_present(self): "FragPipe Format: https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#combined_proteintsv" "MaxQuant Format: http://www.coxdocs.org/doku.php?id=maxquant:table:proteingrouptable" ) + + def _read_all_columns_as_string(self): + self.rawinput.columns = self.rawinput.columns.astype(str) def _check_if_indexcolumn_is_unique(self): - # TODO make own duplicates functions to have less dependencies duplicated_values = list(duplicates(self.rawinput[self.index_column].to_list())) if len(duplicated_values) > 0: # error or warning, duplicates could be resolved with preprocessing/filtering diff --git a/alphastats/loader/DIANNLoader.py b/alphastats/loader/DIANNLoader.py index f93132f3..3c78e31d 100644 --- a/alphastats/loader/DIANNLoader.py +++ b/alphastats/loader/DIANNLoader.py @@ -42,6 +42,7 @@ def __init__( self._remove_filepath_from_name() self._add_tag_to_sample_columns() self._add_contamination_column() + self._read_all_columns_as_string() def _add_tag_to_sample_columns(self): """ diff --git a/alphastats/loader/MaxQuantLoader.py b/alphastats/loader/MaxQuantLoader.py index b9bde4a9..1902391b 100644 --- a/alphastats/loader/MaxQuantLoader.py +++ b/alphastats/loader/MaxQuantLoader.py @@ -34,6 +34,7 @@ def __init__( self.confidence_column = confidence_column self.software = "MaxQuant" self._set_filter_columns_to_true_false() + self._read_all_columns_as_string() if gene_names_column in self.rawinput.columns.to_list(): self.gene_names = gene_names_column diff --git a/alphastats/loader/SpectronautLoader.py b/alphastats/loader/SpectronautLoader.py index b5f70dbb..828575aa 100644 --- a/alphastats/loader/SpectronautLoader.py +++ b/alphastats/loader/SpectronautLoader.py @@ -48,6 +48,7 @@ def __init__( self._reshape_spectronaut(sample_column=sample_column, gene_names_column=gene_names_column) self._add_contamination_column() + self._read_all_columns_as_string() def _reshape_spectronaut(self, sample_column, gene_names_column): diff --git a/docs/conf.py b/docs/conf.py index 075c7f2c..9f900de1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Elena Krismer" # The full version, including alpha/beta/rc tags -release = "0.4.2" +release = "0.4.3" # -- General configuration --------------------------------------------------- diff --git a/release/one_click_linux_gui/control b/release/one_click_linux_gui/control index 29255a73..36d7f619 100644 --- a/release/one_click_linux_gui/control +++ b/release/one_click_linux_gui/control @@ -1,5 +1,5 @@ Package: alphastats -Version: 0.4.2 +Version: 0.4.3 Architecture: all Maintainer: MannLabs Description: alphastats diff --git a/release/one_click_linux_gui/create_installer_linux.sh b/release/one_click_linux_gui/create_installer_linux.sh index eb5bd0bb..b4f5a50e 100644 --- a/release/one_click_linux_gui/create_installer_linux.sh +++ b/release/one_click_linux_gui/create_installer_linux.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_linux_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.4.2-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.3-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/release/one_click_macos_gui/Info.plist b/release/one_click_macos_gui/Info.plist index d0c0ae7a..5c964440 100644 --- a/release/one_click_macos_gui/Info.plist +++ b/release/one_click_macos_gui/Info.plist @@ -9,9 +9,9 @@ CFBundleIconFile alphapeptstats_logo.icns CFBundleIdentifier - alphastats.0.4.2 + alphastats.0.4.3 CFBundleShortVersionString - 0.4.2 + 0.4.3 CFBundleInfoDictionaryVersion 6.0 CFBundleName diff --git a/release/one_click_macos_gui/create_installer_macos.sh b/release/one_click_macos_gui/create_installer_macos.sh index fb0f2674..a94f4933 100755 --- a/release/one_click_macos_gui/create_installer_macos.sh +++ b/release/one_click_macos_gui/create_installer_macos.sh @@ -20,7 +20,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_macos_gui -pip install "../../dist/alphastats-0.4.2-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.3-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/release/one_click_macos_gui/distribution.xml b/release/one_click_macos_gui/distribution.xml index b76dc8c9..bf094197 100644 --- a/release/one_click_macos_gui/distribution.xml +++ b/release/one_click_macos_gui/distribution.xml @@ -1,6 +1,6 @@ - AlphaPeptStats 0.4.0 + AlphaPeptStats 0.4.3 diff --git a/release/one_click_windows_gui/alphastats_innoinstaller.iss b/release/one_click_windows_gui/alphastats_innoinstaller.iss index 7a05b896..122172a7 100644 --- a/release/one_click_windows_gui/alphastats_innoinstaller.iss +++ b/release/one_click_windows_gui/alphastats_innoinstaller.iss @@ -2,7 +2,7 @@ ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! #define MyAppName "AlphaPeptStats" -#define MyAppVersion "0.4.2" +#define MyAppVersion "0.4.3" #define MyAppPublisher "MannLabs" #define MyAppURL "https://github.com/MannLabs/alphapeptstats" #define MyAppExeName "alphastats_gui.exe" diff --git a/release/one_click_windows_gui/create_installer_windows.sh b/release/one_click_windows_gui/create_installer_windows.sh index 1ffc4a55..7abfbeff 100644 --- a/release/one_click_windows_gui/create_installer_windows.sh +++ b/release/one_click_windows_gui/create_installer_windows.sh @@ -17,7 +17,7 @@ python setup.py sdist bdist_wheel # Setting up the local package cd release/one_click_windows_gui # Make sure you include the required extra packages and always use the stable or very-stable options! -pip install "../../dist/alphastats-0.4.2-py3-none-any.whl" +pip install "../../dist/alphastats-0.4.3-py3-none-any.whl" # Creating the stand-alone pyinstaller folder pip install pyinstaller==5.8 diff --git a/setup.py b/setup.py index 74a2cdf3..513c4936 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ def create_pip_wheel(): requirements = get_requirements() setuptools.setup( name="alphastats", - version="0.4.2", + version="0.4.3", license="Apache", description="An open-source Python package for Mass Spectrometry Analysis", long_description=get_long_description(), diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py index 9cbbf931..d0319214 100644 --- a/tests/test_DataSet.py +++ b/tests/test_DataSet.py @@ -257,7 +257,7 @@ def test_preprocess_remove_samples(self): def test_preprocess_normalize_zscore(self): self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]}) # zscore Normalization - self.obj.preprocess(normalization="zscore") + self.obj.preprocess(log2_transform=False,normalization="zscore") expected_mat = pd.DataFrame( { "a": [-1.33630621, 1.06904497, 0.26726124], @@ -270,7 +270,7 @@ def test_preprocess_normalize_zscore(self): def test_preprocess_normalize_quantile(self): self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]}) # Quantile Normalization - self.obj.preprocess(normalization="quantile") + self.obj.preprocess(log2_transform=False,normalization="quantile") expected_mat = pd.DataFrame( {"a": [0.0, 1.0, 0.5], "b": [1.0, 0.0, 0.0], "c": [0.0, 1.0, 0.5]} ) @@ -279,7 +279,7 @@ def test_preprocess_normalize_quantile(self): def test_preprocess_normalize_linear(self): self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]}) # Linear Normalization - self.obj.preprocess(normalization="linear") + self.obj.preprocess(log2_transform=False,normalization="linear") expected_mat = pd.DataFrame( { "a": [0.37139068, 0.42107596, 0.40824829], @@ -292,7 +292,7 @@ def test_preprocess_normalize_linear(self): def test_preprocess_normalize_vst(self): self.obj.mat = pd.DataFrame({"a": [2, 5, 4], "b": [5, 4, 4], "c": [0, 10, 8]}) # Linear Normalization - self.obj.preprocess(normalization="vst") + self.obj.preprocess(log2_transform=False,normalization="vst") expected_mat = pd.DataFrame( { "a": [-1.30773413, 1.12010046, 0.18763367], @@ -306,7 +306,7 @@ def test_preprocess_imputation_mean_values(self): self.obj.mat = pd.DataFrame( {"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]} ) - self.obj.preprocess(imputation="mean") + self.obj.preprocess(log2_transform=False,imputation="mean") expected_mat = pd.DataFrame( {"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]} ) @@ -316,7 +316,7 @@ def test_preprocess_imputation_median_values(self): self.obj.mat = pd.DataFrame( {"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]} ) - self.obj.preprocess(imputation="median") + self.obj.preprocess(log2_transform=False,imputation="median") expected_mat = pd.DataFrame( {"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]} ) @@ -326,7 +326,7 @@ def test_preprocess_imputation_knn_values(self): self.obj.mat = pd.DataFrame( {"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]} ) - self.obj.preprocess(imputation="knn") + self.obj.preprocess(log2_transform=False,imputation="knn") expected_mat = pd.DataFrame( {"a": [2.0, 3.0, 4.0], "b": [5.0, 4.0, 4.0], "c": [10.0, 10.0, 10.0]} ) @@ -336,7 +336,7 @@ def test_preprocess_imputation_randomforest_values(self): self.obj.mat = pd.DataFrame( {"a": [2, np.nan, 4], "b": [5, 4, 4], "c": [np.nan, 10, np.nan]} ) - self.obj.preprocess(imputation="randomforest") + self.obj.preprocess(log2_transform=False,imputation="randomforest") expected_mat = pd.DataFrame( { "a": [2.00000000e00, -9.22337204e12, 4.00000000e00], @@ -372,14 +372,14 @@ def test_plot_correlation_matrix(self): ) def test_plot_clustermap(self): - self.obj.preprocess(imputation="knn") + self.obj.preprocess(log2_transform=False, imputation="knn") plot = self.obj.plot_clustermap() first_row = plot.data2d.iloc[0].to_list() expected = [487618.5371077078, 1293013.103298046] self.assertEqual(first_row, expected) def test_plot_clustermap_with_label_bar(self): - self.obj.preprocess(imputation="knn") + self.obj.preprocess(log2_transform=False, imputation="knn") plot = self.obj.plot_clustermap(label_bar=self.comparison_column) first_row = plot.data2d.iloc[0].to_list() expected = [487618.5371077078, 1293013.103298046] @@ -456,17 +456,10 @@ def test_plot_volcano_compare_preprocessing_modes(self): group2=["1_71_F10", "1_73_F12"], compare_preprocessing_modes=True ) - self.assertEqual(len(result_list), 9) - # check if results are different - # for idx, res in enumerate(result_list): - # for idx2, res2 in enumerate(result_list): - # if idx != idx2: - # difference = dictdiffer.diff(res.to_plotly_json(), res2.to_plotly_json()) - # self.assertNotEqual(len(list(difference)), 0) - + self.assertEqual(len(result_list), 9) def test_preprocess_subset(self): - self.obj.preprocess(subset=True) + self.obj.preprocess(subset=True, log2_transform=False) self.assertEqual(self.obj.mat.shape, (48, 1364)) @patch.object(Statistics, "tukey_test") @@ -552,7 +545,7 @@ def test_plot_volcano_wald(self): self.assertTrue(column_added) def test_plot_volcano_sam(self): - self.obj.preprocess(imputation="knn", normalization="zscore") + self.obj.preprocess(log2_transform=False, imputation="knn", normalization="zscore") plot = self.obj.plot_volcano( column = "disease", group1="type 2 diabetes mellitus", @@ -758,7 +751,7 @@ def test_volcano_plot_anova(self): plot = self.obj.plot_volcano( column="grouping1", group1="Healthy", group2="Disease", method="anova" ) - expected_y_value = 0.09437708068494619 + expected_y_value = 0.040890177695653236 y_value = plot.to_plotly_json().get("data")[0].get("y")[1] self.assertAlmostEqual(y_value, expected_y_value)