diff --git a/.cookietemple.yml b/.cookietemple.yml index c9b3d1a0..97cf1b65 100644 --- a/.cookietemple.yml +++ b/.cookietemple.yml @@ -15,5 +15,5 @@ full_name: Victor Giurcoiu email: victor.giurcoiu@tum.de project_name: oktoberfest project_short_description: Public repo oktoberfest -version: 0.5.2 +version: 0.5.3 license: MIT diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index 9ae5f091..92c40109 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,5 +1,5 @@ -name-template: "0.5.2 🌈" # <> -tag-template: 0.5.2 # <> +name-template: "0.5.3 🌈" # <> +tag-template: 0.5.3 # <> exclude-labels: - "skip-changelog" diff --git a/.github/workflows/sync_project.yml b/.github/workflows/sync_project.yml index ec8869da..9a7b64b8 100644 --- a/.github/workflows/sync_project.yml +++ b/.github/workflows/sync_project.yml @@ -25,9 +25,9 @@ jobs: - uses: oleksiyrudenko/gha-git-credentials@v2.1 with: - name: "victorgiurcoiu" - email: "victor.giurcoiu@tum.de" - actor: "victorgiurcoiu" + name: "Mario Picciani" + email: "mario.picciani@tum.de" + actor: "picciama" token: "${{ secrets.CT_SYNC_TOKEN}}" - name: Sync project diff --git a/LICENSE b/LICENSE index 01740824..ad0a6b19 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022, Victor Giurcoiu +Copyright (c) 2023, Wilhelmlab at Technical University of Munich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/cookietemple.cfg b/cookietemple.cfg index cbfecac4..e9526cc7 100644 --- a/cookietemple.cfg +++ b/cookietemple.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.5.2 +current_version = 0.5.3 [bumpversion_files_whitelisted] init_file = oktoberfest/__init__.py diff --git a/docs/conf.py b/docs/conf.py index 9aa368e2..7c5772a1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,9 +54,9 @@ # the built documents. # # The short X.Y version. -version = "0.5.2" +version = "0.5.3" # The full version, including alpha/beta/rc tags. -release = "0.5.2" +release = "0.5.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/oktoberfest/__init__.py b/oktoberfest/__init__.py index 2bb70248..427eee98 100644 --- a/oktoberfest/__init__.py +++ b/oktoberfest/__init__.py @@ -5,7 +5,7 @@ __author__ = """The Oktoberfest development team (Wilhelmlab at Technical University of Munich)""" __copyright__ = f"Copyright {datetime.now():%Y}, Wilhelmlab at Technical University of Munich" __license__ = "MIT" -__version__ = "0.5.2" +__version__ = "0.5.3" import logging.handlers import sys diff --git a/oktoberfest/predict/koina.py b/oktoberfest/predict/koina.py index 713118fe..4b0bd7d5 100644 --- a/oktoberfest/predict/koina.py +++ b/oktoberfest/predict/koina.py @@ -361,7 +361,7 @@ def __async_callback( self, infer_results: Dict[int, Union[InferResult, InferenceServerException]], request_id: int, - result: InferResult, + result: Optional[InferResult], error: Optional[InferenceServerException], ): """ @@ -408,7 +408,13 @@ def __async_predict_batch( batch_outputs = self.__get_batch_outputs(self.model_outputs.keys()) batch_inputs = self.__get_batch_inputs(data) - for _ in range(retries): + for i in range(retries): + if i > 0: # need to yield first, before doing sth, but only after first time + yield + if isinstance(infer_results.get(request_id), InferResult): + break + del infer_results[request_id] # avoid race condition in case inference is slower than tqdm loop + self.client.async_infer( model_name=self.model_name, request_id=str(request_id), @@ -417,9 +423,6 @@ def __async_predict_batch( outputs=batch_outputs, client_timeout=timeout, ) - yield - if isinstance(infer_results.get(request_id), InferResult): - break def predict( self, @@ -492,23 +495,21 @@ def __predict_async( n_tasks = i + 1 with tqdm(total=n_tasks, desc="Getting predictions", disable=disable_progress_bar) as pbar: unfinished_tasks = [i for i in range(n_tasks)] - while pbar.n != n_tasks: + while pbar.n < n_tasks: time.sleep(0.2) new_unfinished_tasks = [] for j in unfinished_tasks: result = infer_results.get(j) if result is None: new_unfinished_tasks.append(j) - continue - if isinstance(result, InferenceServerException): + elif isinstance(result, InferResult): + pbar.n += 1 + else: # unexpected result / exception -> try again try: - new_unfinished_tasks.append(j) next(tasks[j]) + new_unfinished_tasks.append(j) except StopIteration: pbar.n += 1 - continue - if isinstance(result, InferResult): - pbar.n += 1 unfinished_tasks = new_unfinished_tasks pbar.refresh() diff --git a/oktoberfest/preprocessing/preprocessing.py b/oktoberfest/preprocessing/preprocessing.py index 0fe62f2b..1864b00b 100644 --- a/oktoberfest/preprocessing/preprocessing.py +++ b/oktoberfest/preprocessing/preprocessing.py @@ -247,6 +247,7 @@ def list_spectra(input_dir: Union[str, Path], file_format: str) -> List[Path]: :param file_format: Format of spectra files that match the file extension (case-insensitive), can be "mzML", "RAW" or "pkl". :raises NotADirectoryError: if the specified input directory does not exist :raises ValueError: if the specified file format is not supported + :raises AssertionError: if no files in the provided input directory match the provided file format :return: A list of paths to all spectra files found in the given directory """ if isinstance(input_dir, str): @@ -264,6 +265,12 @@ def list_spectra(input_dir: Union[str, Path], file_format: str) -> List[Path]: else: raise NotADirectoryError(f"{input_dir} does not exist.") + if not raw_files: + raise AssertionError( + f"There are no spectra files with the extension {file_format.lower()} in the provided input_dir {input_dir}. " + "Please check." + ) + return raw_files diff --git a/pyproject.toml b/pyproject.toml index 8f24a495..e6fc47eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [tool.poetry] name = "oktoberfest" -version = "0.5.2" # <> +version = "0.5.3" # <> description = "Public repo oktoberfest" -authors = ["Victor Giurcoiu "] +authors = ["Wilhelmlab at Technical University of Munich"] license = "MIT" readme = "README.rst" homepage = "https://github.com/wilhelm-lab/oktoberfest" diff --git a/tests/unit_tests/test_pp.py b/tests/unit_tests/test_pp.py new file mode 100644 index 00000000..f04bef0d --- /dev/null +++ b/tests/unit_tests/test_pp.py @@ -0,0 +1,28 @@ +import unittest +from pathlib import Path + +from oktoberfest import pp + + +class TestProcessing(unittest.TestCase): + """Test class for preprocessing functions.""" + + def test_list_spectra(self): + """Test listing of spectra with expected user input.""" + spectra_path = Path(__file__).parent + spectra_file = spectra_path / "test.mzml" + spectra_file.open("w").close() + self.assertEqual([spectra_path / "test.mzml"], pp.list_spectra(spectra_path, file_format="mzml")) + spectra_file.unlink() + + def test_list_spectra_with_empty_string_folder(self): + """Test listing spectra in a string folder without matching files.""" + self.assertRaises(AssertionError, pp.list_spectra, str(Path(__file__).parent), "raw") + + def test_list_spectra_with_wrong_folder(self): + """Test listing spectra in a folder that does not exist.""" + self.assertRaises(NotADirectoryError, pp.list_spectra, Path(__file__).parent / "noexist", "raw") + + def test_list_spectra_with_wrong_format(self): + """Test listing spectra with a format that isn't allowed.""" + self.assertRaises(ValueError, pp.list_spectra, Path(__file__).parent, "mzm") diff --git a/tests/unit_tests/test_predictions.py b/tests/unit_tests/test_predictions.py index d329b9d1..e4aba786 100644 --- a/tests/unit_tests/test_predictions.py +++ b/tests/unit_tests/test_predictions.py @@ -37,3 +37,18 @@ def test_prosit_tmt(self): expected_df["PREDICTED_IRT"] = expected_df["PREDICTED_IRT"].astype(library.spectra_data["PREDICTED_IRT"].dtype) pd.testing.assert_frame_equal(library.spectra_data, expected_df) + + def test_failing_koina(self): + """Test koina with input data that does not fit to the model to trigger exception handling.""" + library = Spectra.from_csv(Path(__file__).parent / "data" / "predictions" / "library_input.csv") + input_data = library.spectra_data + + self.assertRaises( + Exception, + predict, + input_data, + model_name="Prosit_2020_intensity_HCD", + server_url="koina.proteomicsdb.org:443", + ssl=True, + targets=["intensities", "annotation"], + )