From 3f636edb400673f0f0e6e23909f87bcb5fbb2150 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Mon, 18 Dec 2023 18:19:58 +0100 Subject: [PATCH 1/9] updated authors and actors --- .github/workflows/sync_project.yml | 6 +++--- LICENSE | 2 +- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/sync_project.yml b/.github/workflows/sync_project.yml index ec8869da..9a7b64b8 100644 --- a/.github/workflows/sync_project.yml +++ b/.github/workflows/sync_project.yml @@ -25,9 +25,9 @@ jobs: - uses: oleksiyrudenko/gha-git-credentials@v2.1 with: - name: "victorgiurcoiu" - email: "victor.giurcoiu@tum.de" - actor: "victorgiurcoiu" + name: "Mario Picciani" + email: "mario.picciani@tum.de" + actor: "picciama" token: "${{ secrets.CT_SYNC_TOKEN}}" - name: Sync project diff --git a/LICENSE b/LICENSE index 01740824..ad0a6b19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022, Victor Giurcoiu +Copyright (c) 2023, Wilhelmlab at Technical University of Munich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/pyproject.toml b/pyproject.toml index 8f24a495..b1ccab19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "oktoberfest" version = "0.5.2" # <> description = "Public repo oktoberfest" -authors = ["Victor Giurcoiu "] +authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" readme = "README.rst" homepage = "https://github.com/wilhelm-lab/oktoberfest" From 4e5a73d5c03c6bfdf58257cc4417c55820aafe5a Mon Sep 17 00:00:00 2001 From: Ludwig Lautenbacher Date: Thu, 21 Dec 2023 13:05:02 +0000 Subject: [PATCH 2/9] Fix endless loop in waiting logic of retry mechanism. --- oktoberfest/predict/koina.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index 713118fe..13934a33 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -492,22 +492,20 @@ def __predict_async( n_tasks = i + 1 with tqdm(total=n_tasks, desc="Getting predictions", disable=disable_progress_bar) as pbar: unfinished_tasks = [i for i in range(n_tasks)] - while pbar.n != n_tasks: + while pbar.n < n_tasks: time.sleep(0.2) new_unfinished_tasks = [] for j in unfinished_tasks: result = infer_results.get(j) if result is None: new_unfinished_tasks.append(j) - continue - if isinstance(result, InferenceServerException): + elif isinstance(result, InferenceServerException): try: - new_unfinished_tasks.append(j) next(tasks[j]) + new_unfinished_tasks.append(j) except StopIteration: pbar.n += 1 - continue - if isinstance(result, InferResult): + elif isinstance(result, InferResult): pbar.n += 1 unfinished_tasks = new_unfinished_tasks From b08a87a521146e1f669b6f275c174589cfefa377 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 14:52:37 +0100 Subject: [PATCH 3/9] reorder tqdm logic --- oktoberfest/predict/koina.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index 13934a33..b13daa4f 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -499,14 +499,14 @@ def __predict_async( result = infer_results.get(j) if result is None: new_unfinished_tasks.append(j) - elif isinstance(result, InferenceServerException): + elif isinstance(result, InferResult): + pbar.n += 1 + else: # unexpected result / exception -> try again try: next(tasks[j]) new_unfinished_tasks.append(j) except StopIteration: pbar.n += 1 - elif isinstance(result, InferResult): - pbar.n += 1 unfinished_tasks = new_unfinished_tasks pbar.refresh() From d3aae5051692d6973a9c09716566301583e29782 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 15:02:58 +0100 Subject: [PATCH 4/9] Bump version from 0.5.2 to 0.5.3 --- .cookietemple.yml | 2 +- .github/release-drafter.yml | 4 ++-- cookietemple.cfg | 2 +- docs/conf.py | 4 ++-- oktoberfest/__init__.py | 2 +- pyproject.toml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.cookietemple.yml b/.cookietemple.yml index c9b3d1a0..97cf1b65 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu email: victor.giurcoiu@tum.de project_name: oktoberfest project_short_description: Public repo oktoberfest -version: 0.5.2 +version: 0.5.3 license: MIT diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 9ae5f091..92c40109 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.5.2 🌈" # <> -tag-template: 0.5.2 # <> +name-template: "0.5.3 🌈" # <> +tag-template: 0.5.3 # <> exclude-labels: - "skip-changelog" diff --git a/cookietemple.cfg b/cookietemple.cfg index cbfecac4..e9526cc7 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.5.2 +current_version = 0.5.3 [bumpversion_files_whitelisted] init_file = oktoberfest/__init__.py diff --git a/docs/conf.py b/docs/conf.py index 9aa368e2..7c5772a1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,9 +54,9 @@ # the built documents. # # The short X.Y version. -version = "0.5.2" +version = "0.5.3" # The full version, including alpha/beta/rc tags. -release = "0.5.2" +release = "0.5.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/oktoberfest/__init__.py b/oktoberfest/__init__.py index 2bb70248..427eee98 100644 --- a/oktoberfest/__init__.py +++ b/oktoberfest/__init__.py @@ -5,7 +5,7 @@ __author__ = """The Oktoberfest development team (Wilhelmlab at Technical University of Munich)""" __copyright__ = f"Copyright {datetime.now():%Y}, Wilhelmlab at Technical University of Munich" __license__ = "MIT" -__version__ = "0.5.2" +__version__ = "0.5.3" import logging.handlers import sys diff --git a/pyproject.toml b/pyproject.toml index b1ccab19..e6fc47eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "oktoberfest" -version = "0.5.2" # <> +version = "0.5.3" # <> description = "Public repo oktoberfest" authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" From 98b19e84634872b159288dfe6a66810883c1973f Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 15:14:38 +0100 Subject: [PATCH 5/9] check and fail if no spectra files matched --- oktoberfest/preprocessing/preprocessing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/oktoberfest/preprocessing/preprocessing.py b/oktoberfest/preprocessing/preprocessing.py index 0fe62f2b..1864b00b 100644 --- a/oktoberfest/preprocessing/preprocessing.py +++ b/oktoberfest/preprocessing/preprocessing.py @@ -247,6 +247,7 @@ def list_spectra(input_dir: Union[str, Path], file_format: str) -> List[Path]: :param file_format: Format of spectra files that match the file extension (case-insensitive), can be "mzML", "RAW" or "pkl". :raises NotADirectoryError: if the specified input directory does not exist :raises ValueError: if the specified file format is not supported + :raises AssertionError: if no files in the provided input directory match the provided file format :return: A list of paths to all spectra files found in the given directory """ if isinstance(input_dir, str): @@ -264,6 +265,12 @@ def list_spectra(input_dir: Union[str, Path], file_format: str) -> List[Path]: else: raise NotADirectoryError(f"{input_dir} does not exist.") + if not raw_files: + raise AssertionError( + f"There are no spectra files with the extension {file_format.lower()} in the provided input_dir {input_dir}. " + "Please check." + ) + return raw_files From 47cdf076d386dea1570bbd72bdac1383e355a8cf Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 16:47:07 +0100 Subject: [PATCH 6/9] added tests for list_spectra with various inputs --- tests/unit_tests/test_pp.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tests/unit_tests/test_pp.py diff --git a/tests/unit_tests/test_pp.py b/tests/unit_tests/test_pp.py new file mode 100644 index 00000000..f04bef0d --- /dev/null +++ b/tests/unit_tests/test_pp.py @@ -0,0 +1,28 @@ +import unittest +from pathlib import Path + +from oktoberfest import pp + + +class TestProcessing(unittest.TestCase): + """Test class for preprocessing functions.""" + + def test_list_spectra(self): + """Test listing of spectra with expected user input.""" + spectra_path = Path(__file__).parent + spectra_file = spectra_path / "test.mzml" + spectra_file.open("w").close() + self.assertEqual([spectra_path / "test.mzml"], pp.list_spectra(spectra_path, file_format="mzml")) + spectra_file.unlink() + + def test_list_spectra_with_empty_string_folder(self): + """Test listing spectra in a string folder without matching files.""" + self.assertRaises(AssertionError, pp.list_spectra, str(Path(__file__).parent), "raw") + + def test_list_spectra_with_wrong_folder(self): + """Test listing spectra in a folder that does not exist.""" + self.assertRaises(NotADirectoryError, pp.list_spectra, Path(__file__).parent / "noexist", "raw") + + def test_list_spectra_with_wrong_format(self): + """Test listing spectra with a format that isn't allowed.""" + self.assertRaises(ValueError, pp.list_spectra, Path(__file__).parent, "mzm") From 1d983c340d9c0f3cbbe8ac05c0b5b3d95743604c Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 16:58:59 +0100 Subject: [PATCH 7/9] added test for failing koina prediction --- tests/unit_tests/test_predictions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit_tests/test_predictions.py b/tests/unit_tests/test_predictions.py index d329b9d1..e4aba786 100644 --- a/tests/unit_tests/test_predictions.py +++ b/tests/unit_tests/test_predictions.py @@ -37,3 +37,18 @@ def test_prosit_tmt(self): expected_df["PREDICTED_IRT"] = expected_df["PREDICTED_IRT"].astype(library.spectra_data["PREDICTED_IRT"].dtype) pd.testing.assert_frame_equal(library.spectra_data, expected_df) + + def test_failing_koina(self): + """Test koina with input data that does not fit to the model to trigger exception handling.""" + library = Spectra.from_csv(Path(__file__).parent / "data" / "predictions" / "library_input.csv") + input_data = library.spectra_data + + self.assertRaises( + Exception, + predict, + input_data, + model_name="Prosit_2020_intensity_HCD", + server_url="koina.proteomicsdb.org:443", + ssl=True, + targets=["intensities", "annotation"], + ) From dd676d2aa8b1006124736471faf3c4eda2003dd4 Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 17:42:14 +0100 Subject: [PATCH 8/9] fix typeguard and race condition in tqdm loop --- oktoberfest/predict/koina.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index b13daa4f..6df94e32 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -361,7 +361,7 @@ def __async_callback( self, infer_results: Dict[int, Union[InferResult, InferenceServerException]], request_id: int, - result: InferResult, + result: Optional[InferResult], error: Optional[InferenceServerException], ): """ @@ -503,6 +503,7 @@ def __predict_async( pbar.n += 1 else: # unexpected result / exception -> try again try: + del infer_results[j] # avoid race condition in case inference is slower than loop next(tasks[j]) new_unfinished_tasks.append(j) except StopIteration: From 3300f37a46ebe5ebd9b2f2c30d444fb2a4778cfc Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Thu, 21 Dec 2023 18:02:20 +0100 Subject: [PATCH 9/9] fix the order of yield and async infer --- oktoberfest/predict/koina.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index 6df94e32..4b0bd7d5 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -408,7 +408,13 @@ def __async_predict_batch( batch_outputs = self.__get_batch_outputs(self.model_outputs.keys()) batch_inputs = self.__get_batch_inputs(data) - for _ in range(retries): + for i in range(retries): + if i > 0: # need to yield first, before doing sth, but only after first time + yield + if isinstance(infer_results.get(request_id), InferResult): + break + del infer_results[request_id] # avoid race condition in case inference is slower than tqdm loop + self.client.async_infer( model_name=self.model_name, request_id=str(request_id), @@ -417,9 +423,6 @@ def __async_predict_batch( outputs=batch_outputs, client_timeout=timeout, ) - yield - if isinstance(infer_results.get(request_id), InferResult): - break def predict( self, @@ -503,7 +506,6 @@ def __predict_async( pbar.n += 1 else: # unexpected result / exception -> try again try: - del infer_results[j] # avoid race condition in case inference is slower than loop next(tasks[j]) new_unfinished_tasks.append(j) except StopIteration: