diff --git a/models/Prosit/Prosit_2023_intensity_TOF/1/.donotdelete b/models/Prosit/Prosit_2023_intensity_TOF/1/.donotdelete new file mode 100644 index 00000000..e69de29b diff --git a/models/Prosit/Prosit_2023_intensity_TOF/config.pbtxt b/models/Prosit/Prosit_2023_intensity_TOF/config.pbtxt new file mode 100644 index 00000000..af79869b --- /dev/null +++ b/models/Prosit/Prosit_2023_intensity_TOF/config.pbtxt @@ -0,0 +1,133 @@ +max_batch_size: 1000 +platform: "ensemble" +input [ + { + name: 'peptide_sequences', + data_type: TYPE_STRING, + dims: [-1] + }, + { + name: 'precursor_charges', + data_type: TYPE_INT32, + dims: [1], + }, + { + name: 'collision_energies', + data_type: TYPE_FP32, + dims: [1], + } +] +output [ + { + name: 'intensities', + data_type: TYPE_FP32, + dims: [174] + }, + { + name: 'mz', + data_type: TYPE_FP32, + dims: [174] + }, + { + name: 'annotation', + data_type: TYPE_STRING, + dims: [174] + } +] + +ensemble_scheduling { + step [ + { + model_name: "Prosit_Preprocess_charge" + model_version: 1 + input_map { + key: "precursor_charges" + value: "precursor_charges" + }, + output_map { + key: "precursor_charges_in:0" + value: "precursor_charges_in_preprocessed:0" + } + }, + { + model_name: "Prosit_Preprocess_peptide" + model_version: 1 + input_map { + key: "peptide_sequences" + value: "peptide_sequences" + }, + output_map { + key: "peptides_in:0" + value: "peptides_in:0" + } + }, + { + model_name: "Prosit_Preprocess_collision_energy" + model_version: 1 + input_map { + key: "raw_collision_energy" + value: "collision_energies" + }, + output_map { + key: "norm_collision_energy" + value: "norm_collision_energy" + } + }, + { + model_name: "Prosit_2023_intensity_TOF_core" + model_version: 1 + input_map { + key: "peptides_in" + value: "peptides_in:0" + }, + input_map { + key: "collision_energy_in" + value: "norm_collision_energy" + }, + input_map { + key: "precursor_charge_in" + value: "precursor_charges_in_preprocessed:0" + } + output_map { + key: "out" + value: "out/Reshape:0" + } + }, + { + model_name: "Prosit_2019_intensity_postprocess" + model_version: 1 + input_map { + key: "peptides_in:0" + value: "peptide_sequences" + }, + input_map{ + key: "precursor_charges_in:0" + value: "precursor_charges_in_preprocessed:0" + } + input_map{ + key: "peaks_in:0", + value: "out/Reshape:0" + } + output_map { + key: "intensities" + value: "intensities" + } + output_map { + key: "mz" + value: "mz" + } + }, + { + model_name: "Prosit_Helper_annotation" + model_version: 1 + input_map { + key: "precursor_charges" + value: "precursor_charges" + }, + output_map { + key: "annotation" + value: "annotation" + } + } + ] +} diff --git a/models/Prosit/Prosit_2023_intensity_TOF/notes.yaml b/models/Prosit/Prosit_2023_intensity_TOF/notes.yaml new file mode 100644 index 00000000..048aaf78 --- /dev/null +++ b/models/Prosit/Prosit_2023_intensity_TOF/notes.yaml @@ -0,0 +1,37 @@ +description: | + The HCD Prosit 2020 model was fine-tuned using 277,781 MS/MS spectra of both tryptic and non-tryptic synthesized peptides measured on a timsTOF Pro. The model architecture remained unchanged. The data was split into three distinct sets with each peptide and subsequence of a peptide only included in one of the three: training (80%, 153,809 tryptic PSMs and 77,577 non-tryptic PSMs), validation (10%, 16,483 tryptic PSMs and 7,778 non-tryptic PSMs), and test (10%, 14,262 tryptic PSMs and 7,872 non-tryptic PSMs). + + For this project, over 300,000 non-tryptic peptides from the ProteomeTools project were measured. Our measurements encompassed a range of collision energies from 20.81 EV to 69.77 eV. The data was analyzed using MaxQuant version 2.1.2.0 with carbamidomethylated cysteine specified as a fixed modification and methionine oxidation as a variable modification. + + The HCD Prosit 2020 model was originally trained on approximately 30 million MS/MS spectra, consisting of 9 million MS/MS spectra of non-tryptic peptides and 21 million previously published tryptic MS/MS spectra. The comparison between the HCD Prosit 2020 model and the newly developed TOF Prosit 2023 model reveals a substantial improvement in normalized spectral contrast angle (SA) between predicted and experimental timsTOF MS/MS spectra for both non-tryptic and tryptic peptides. The TOF Prosit 2023 model achieved a SA ≥ 0.9 for 26.3% of non-tryptic spectra (compared to 2.4% with HCD Prosit 2020) and 42.1% of tryptic spectra (compared to 0.2% with HCD Prosit 2020). + + The TOF Prosit 2023 model demonstrates consistent performance across different precursor charges, peptide lengths, and collision energies, with minimal bias towards C- and N-terminal amino acids. Both the tryptic and non-tryptic timsTOF data are available via PRIDE, with the identifiers PXD019086 and PXD043844, respectively. + +citation: | + Fragment ion intensity prediction improves the identification rate of non-tryptic peptides in TimsTOF + Charlotte Adams, Wassim Gabriel, Kris Laukens, Mathias Wilhelm, Wout Bittremieux, Kurt Boonen + bioRxiv 2023.07.17.549401; doi: https://doi.org/10.1101/2023.07.17.549401 +tag: "Intensity" +tag_url: "https://www.proteomicsdb.org/" +examples: + inputs: + [ + { + "name": "peptide_sequences", + "httpdtype": "BYTES", + "shape": "[2,1]", + "data": '["AAAAAKAK", "AAAAAKAK"]' + }, + { + "name": "precursor_charges", + "httpdtype": "INT32", + "shape": "[2,1]", + "data": '[1,2]' + }, + { + "name": "collision_energies", + "httpdtype": "FP32", + "shape": "[2,1]", + "data": '[25, 25]' + } + ] \ No newline at end of file diff --git a/models/Prosit/Prosit_2023_intensity_TOF_core/1/.zenodo b/models/Prosit/Prosit_2023_intensity_TOF_core/1/.zenodo new file mode 100644 index 00000000..4be20050 --- /dev/null +++ b/models/Prosit/Prosit_2023_intensity_TOF_core/1/.zenodo @@ -0,0 +1 @@ +https://zenodo.org/record/8211811/files/model.savedmodel.zip?download=1 diff --git a/models/Prosit/Prosit_2023_intensity_TOF_core/config.pbtxt b/models/Prosit/Prosit_2023_intensity_TOF_core/config.pbtxt new file mode 100644 index 00000000..09ca435b --- /dev/null +++ b/models/Prosit/Prosit_2023_intensity_TOF_core/config.pbtxt @@ -0,0 +1 @@ +max_batch_size: 1000 \ No newline at end of file diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_ce.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_ce.npy new file mode 100644 index 00000000..ceb91364 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_ce.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_ce_norm.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_ce_norm.npy new file mode 100644 index 00000000..acf4f27d Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_ce_norm.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_charge.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_charge.npy new file mode 100644 index 00000000..a344e7d2 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_charge.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_charge_onehot.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_charge_onehot.npy new file mode 100644 index 00000000..326e77c0 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_charge_onehot.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_int.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_int.npy new file mode 100644 index 00000000..09cf54d8 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_int.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_int_raw.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_int_raw.npy new file mode 100644 index 00000000..a21da0ee Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_int_raw.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_seq.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_seq.npy new file mode 100644 index 00000000..01c30594 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_seq.npy differ diff --git a/test/Prosit/arr_Prosit_2023_intensity_TOF_seq_encoding.npy b/test/Prosit/arr_Prosit_2023_intensity_TOF_seq_encoding.npy new file mode 100644 index 00000000..a5752fc2 Binary files /dev/null and b/test/Prosit/arr_Prosit_2023_intensity_TOF_seq_encoding.npy differ diff --git a/test/Prosit/test_Prosit_2023_intensity_TOF.py b/test/Prosit/test_Prosit_2023_intensity_TOF.py new file mode 100644 index 00000000..3d80b06e --- /dev/null +++ b/test/Prosit/test_Prosit_2023_intensity_TOF.py @@ -0,0 +1,64 @@ +from test.server_config import SERVER_GRPC, SERVER_HTTP +import tritonclient.grpc as grpcclient +import numpy as np +from pathlib import Path +import requests + +# To ensure MODEL_NAME == test_.py +MODEL_NAME = Path(__file__).stem.replace("test_", "") + + +def test_available_http(): + req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1) + assert req.status_code == 200 + + +def test_available_grpc(): + triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC) + assert triton_client.is_model_ready(MODEL_NAME) + + +def test_inference(): + SEQUENCES = np.load( + "test/Prosit/arr_Prosit_2023_intensity_TOF_seq.npy", allow_pickle=True + ) + charge = np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_charge.npy") + ces = np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_ce.npy") + + triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC) + + in_pep_seq = grpcclient.InferInput("peptide_sequences", SEQUENCES.shape, "BYTES") + in_pep_seq.set_data_from_numpy(SEQUENCES) + + in_charge = grpcclient.InferInput("precursor_charges", charge.shape, "INT32") + in_charge.set_data_from_numpy(charge) + + in_ces = grpcclient.InferInput("collision_energies", ces.shape, "FP32") + in_ces.set_data_from_numpy(ces) + + result = triton_client.infer( + MODEL_NAME, + inputs=[in_pep_seq, in_charge, in_ces], + outputs=[ + grpcclient.InferRequestedOutput("intensities"), + grpcclient.InferRequestedOutput("mz"), + grpcclient.InferRequestedOutput("annotation"), + ], + ) + + intensities = result.as_numpy("intensities") + fragmentmz = result.as_numpy("mz") + annotation = result.as_numpy("annotation") + + assert intensities.shape == (SEQUENCES.shape[0], 174) + assert fragmentmz.shape == (SEQUENCES.shape[0], 174) + assert annotation.shape == (SEQUENCES.shape[0], 174) + + # Assert intensities consistent + assert np.allclose( + intensities, + np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_int.npy"), + rtol=0, + atol=1e-5, + equal_nan=True, + ) diff --git a/test/Prosit/test_Prosit_2023_intensity_TOF_core.py b/test/Prosit/test_Prosit_2023_intensity_TOF_core.py new file mode 100644 index 00000000..8ab8bfae --- /dev/null +++ b/test/Prosit/test_Prosit_2023_intensity_TOF_core.py @@ -0,0 +1,52 @@ +from test.server_config import SERVER_GRPC, SERVER_HTTP +import tritonclient.grpc as grpcclient +import numpy as np +from pathlib import Path +import requests + +# To ensure MODEL_NAME == test_.py +MODEL_NAME = Path(__file__).stem.replace("test_", "") + + +def test_available_http(): + req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1) + assert req.status_code == 200 + + +def test_available_grpc(): + triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC) + assert triton_client.is_model_ready(MODEL_NAME) + + +def test_inference(): + seq = np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_seq_encoding.npy") + charge = np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_charge_onehot.npy") + ces = np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_ce_norm.npy") + + triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC) + + in_pep_seq = grpcclient.InferInput("peptides_in", seq.shape, "INT32") + in_pep_seq.set_data_from_numpy(seq) + + in_charge = grpcclient.InferInput("precursor_charge_in", charge.shape, "FP32") + in_charge.set_data_from_numpy(charge) + + in_ces = grpcclient.InferInput("collision_energy_in", ces.shape, "FP32") + in_ces.set_data_from_numpy(ces) + + result = triton_client.infer( + MODEL_NAME, + inputs=[in_pep_seq, in_charge, in_ces], + outputs=[ + grpcclient.InferRequestedOutput("out"), + ], + ) + + intensities = result.as_numpy("out") + + assert np.allclose( + intensities, + np.load("test/Prosit/arr_Prosit_2023_intensity_TOF_int_raw.npy"), + rtol=0, + atol=1e-4, + ) diff --git a/web/README.md b/web/README.md index a0d45a99..77753457 100644 --- a/web/README.md +++ b/web/README.md @@ -3,6 +3,10 @@ Koina landing page and documentation built with [Nuxt](https://nuxt.com) and [RapiDoc](https://rapidocweb.com/index.html) +## OpenAPI + +Regenerate the `public/openapi.yaml` by running `./web/openapi/openapi_gen.py` + ## Development Server Make sure to install the dependencies first diff --git a/web/openapi/openapi_gen.py b/web/openapi/openapi_gen.py index aec06349..1c2c5957 100755 --- a/web/openapi/openapi_gen.py +++ b/web/openapi/openapi_gen.py @@ -57,6 +57,8 @@ def sleep_until_service_starts(http_server): def get_config(http_url, name): + # TODO throw an error when the an unknown model is requested + # {'error': "Request for unknown model: 'Deeplc_hela_hf' is not found"} url = http_url + f"/v2/models/{name}/config" logging.info(f"Getting config from:\t\t{url}") r = requests.get(url, timeout=1) @@ -105,6 +107,12 @@ def main(http_url, grpc_url, tmpl_url): models[-1]["note"]["description"] = models[-1]["note"]["description"].replace( "\n", "
" ) + try: + models[-1]["note"]["citation"] = models[-1]["note"]["citation"].replace( + "\n", "
" + ) + except KeyError: + logging.warning(f"Model {name} does not contain a citation") add_np_and_openapi_dtype(models[-1]["note"]) copy_outputs_to_note(models[-1]) verify_inputs(models[-1]) diff --git a/web/openapi/templates/openapi.yml b/web/openapi/templates/openapi.yml index 65390902..f5dde2d6 100644 --- a/web/openapi/templates/openapi.yml +++ b/web/openapi/templates/openapi.yml @@ -38,12 +38,9 @@ externalDocs: servers: - url: {{tmpl_url}}/v2/models tags: -{% for model in models %} - - name: {{model.note.tag}} - externalDocs: - description: Find out more - url: {{model.note.tag_url}} -{% endfor %} + - name: Retention Time + - name: Intensity + - name: Collisional cross section paths: {% for model in models %} /{{model.name}}/infer: @@ -52,11 +49,24 @@ paths: - {{model.note.tag}} summary: {{model.name}} description: | + **Summary** + {{model.note.description }} + + **Citaton** + + {{model.note.citation }} + + **Examples** + +
+ Python using GRPC + ```python {{model.code}} ``` - " + +
operationId: {{model.name}} requestBody: description: Body diff --git a/web/public/openapi.yml b/web/public/openapi.yml index 3b18ce1a..c7801b7a 100644 --- a/web/public/openapi.yml +++ b/web/public/openapi.yml @@ -38,62 +38,9 @@ externalDocs: servers: - url: https://koina.proteomicsdb.org/v2/models tags: - - name: Retention Time - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - name: Collisional cross section - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Retention Time - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Retention Time - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Intensity - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - - - name: Retention Time - externalDocs: - description: Find out more - url: https://www.proteomicsdb.org/ - paths: /Deeplc_hela_hf/infer: @@ -102,7 +49,19 @@ paths: - Retention Time summary: Deeplc_hela_hf description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

DeepLC can predict retention times for peptides that carry as-yet unseen modifications
Robbin Bouwmeester, Ralf Gabriels, Niels Hulstaert, Lennart Martens & Sven Degroeve
Nature Methods 18, 1363–1369 (2021) doi: 10.1038/s41592-021-01301-5
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -155,7 +114,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Deeplc_hela_hf requestBody: description: Body @@ -176,7 +136,19 @@ paths: - Intensity summary: ms2pip_2021_HCD description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Gabriels, R., Martens, L., & Degroeve, S. (2019). Updated MS²PIP web server delivers fast and accurate MS² peak intensity prediction for multiple fragmentation methods, instruments and labeling techniques. Nucleic Acids Research doi:10.1093/nar/gkz299

Prior MS²PIP publications:

Degroeve, S., Maddelein, D., & Martens, L. (2015). MS²PIP prediction server: compute and visualize MS² peak intensity predictions for CID and HCD fragmentation. Nucleic Acids Research, 43(W1), W326–W330. doi:10.1093/nar/gkv542

Degroeve, S., & Martens, L. (2013). MS²PIP: a tool for MS/MS peak intensity prediction. Bioinformatics (Oxford, England), 29(24), 3199–203. doi:10.1093/bioinformatics/btt544
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -230,7 +202,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: ms2pip_2021_HCD requestBody: description: Body @@ -251,7 +224,19 @@ paths: - Collisional cross section summary: AlphaPept_ccs_generic description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wen-Feng Zeng, Xie-Xuan Zhou, Sander Willems, Constantin Ammar, Maria Wahle, Isabell Bludau, Eugenia Voytik, Maximillian T. Strauss & Matthias Mann. AlphaPeptDeep: a modular deep learning framework to predict peptide properties for proteomics. Nat Commun 13, 7238 (2022). https://doi.org/10.1038/s41467-022-34904-3
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -305,7 +290,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: AlphaPept_ccs_generic requestBody: description: Body @@ -326,7 +312,19 @@ paths: - Intensity summary: AlphaPept_ms2_generic description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wen-Feng Zeng, Xie-Xuan Zhou, Sander Willems, Constantin Ammar, Maria Wahle, Isabell Bludau, Eugenia Voytik, Maximillian T. Strauss & Matthias Mann. AlphaPeptDeep: a modular deep learning framework to predict peptide properties for proteomics. Nat Commun 13, 7238 (2022). https://doi.org/10.1038/s41467-022-34904-3
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -382,7 +380,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: AlphaPept_ms2_generic requestBody: description: Body @@ -403,7 +402,19 @@ paths: - Retention Time summary: AlphaPept_rt_generic description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wen-Feng Zeng, Xie-Xuan Zhou, Sander Willems, Constantin Ammar, Maria Wahle, Isabell Bludau, Eugenia Voytik, Maximillian T. Strauss & Matthias Mann. AlphaPeptDeep: a modular deep learning framework to predict peptide properties for proteomics. Nat Commun 13, 7238 (2022). https://doi.org/10.1038/s41467-022-34904-3
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -456,7 +467,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: AlphaPept_rt_generic requestBody: description: Body @@ -477,7 +489,19 @@ paths: - Retention Time summary: Prosit_2019_irt description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wilhelm, M., Zolg, D.P., Graber, M. et al. Deep learning boosts sensitivity of mass spectrometry-based immunopeptidomics. Nat Commun 12, 3346 (2021). https://doi.org/10.1038/s41467-021-23713-9
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -530,7 +554,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2019_irt requestBody: description: Body @@ -551,7 +576,19 @@ paths: - Intensity summary: Prosit_2019_intensity description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Prosit offers high quality MS2 predicted spectra for any organism and protease as well as iRT prediction. When using Prosit is helpful for your research, please cite "Gessulat, Schmidt et al. 2019" DOI 10.1038/s41592-019-0426-7
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -606,7 +643,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2019_intensity requestBody: description: Body @@ -627,7 +665,19 @@ paths: - Intensity summary: Prosit_2020_intensity_CID description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wilhelm, M., Zolg, D.P., Graber, M. et al. Deep learning boosts sensitivity of mass spectrometry-based immunopeptidomics. Nat Commun 12, 3346 (2021). https://doi.org/10.1038/s41467-021-23713-9
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -681,7 +731,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2020_intensity_CID requestBody: description: Body @@ -702,7 +753,19 @@ paths: - Intensity summary: Prosit_2020_intensity_TMT description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wassim Gabriel, Matthew The, Daniel P. Zolg, Florian P. Bayer, Omar Shouman, Ludwig Lautenbacher, Karsten Schnatbaum, Johannes Zerweck, Tobias Knaute, Bernard Delanghe, Andreas Huhmer, Holger Wenschuh, Ulf Reimer, Guillaume Médard, Bernhard Kuster, and Mathias Wilhelm
Analytical Chemistry 2022 94 (20), 7181-7190
DOI: 10.1021/acs.analchem.1c05435
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -758,7 +821,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2020_intensity_TMT requestBody: description: Body @@ -779,7 +843,19 @@ paths: - Intensity summary: Prosit_2020_intensity_HCD description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wilhelm, M., Zolg, D.P., Graber, M. et al. Deep learning boosts sensitivity of mass spectrometry-based immunopeptidomics. Nat Commun 12, 3346 (2021). https://doi.org/10.1038/s41467-021-23713-9
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -834,7 +910,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2020_intensity_HCD requestBody: description: Body @@ -849,13 +926,114 @@ paths: '405': description: Invalid input + /Prosit_2023_intensity_TOF/infer: + post: + tags: + - Intensity + summary: Prosit_2023_intensity_TOF + description: | + **Summary** + + The HCD Prosit 2020 model was fine-tuned using 277,781 MS/MS spectra of both tryptic and non-tryptic synthesized peptides measured on a timsTOF Pro. The model architecture remained unchanged. The data was split into three distinct sets with each peptide and subsequence of a peptide only included in one of the three: training (80%, 153,809 tryptic PSMs and 77,577 non-tryptic PSMs), validation (10%, 16,483 tryptic PSMs and 7,778 non-tryptic PSMs), and test (10%, 14,262 tryptic PSMs and 7,872 non-tryptic PSMs).

For this project, over 300,000 non-tryptic peptides from the ProteomeTools project were measured. Our measurements encompassed a range of collision energies from 20.81 EV to 69.77 eV. The data was analyzed using MaxQuant version 2.1.2.0 with carbamidomethylated cysteine specified as a fixed modification and methionine oxidation as a variable modification.

The HCD Prosit 2020 model was originally trained on approximately 30 million MS/MS spectra, consisting of 9 million MS/MS spectra of non-tryptic peptides and 21 million previously published tryptic MS/MS spectra. The comparison between the HCD Prosit 2020 model and the newly developed TOF Prosit 2023 model reveals a substantial improvement in normalized spectral contrast angle (SA) between predicted and experimental timsTOF MS/MS spectra for both non-tryptic and tryptic peptides. The TOF Prosit 2023 model achieved a SA ≥ 0.9 for 26.3% of non-tryptic spectra (compared to 2.4% with HCD Prosit 2020) and 42.1% of tryptic spectra (compared to 0.2% with HCD Prosit 2020).

The TOF Prosit 2023 model demonstrates consistent performance across different precursor charges, peptide lengths, and collision energies, with minimal bias towards C- and N-terminal amino acids. Both the tryptic and non-tryptic timsTOF data are available via PRIDE, with the identifiers PXD019086 and PXD043844, respectively.
+ + **Citaton** + + Fragment ion intensity prediction improves the identification rate of non-tryptic peptides in TimsTOF
Charlotte Adams, Wassim Gabriel, Kris Laukens, Mathias Wilhelm, Wout Bittremieux, Kurt Boonen
bioRxiv 2023.07.17.549401; doi: https://doi.org/10.1101/2023.07.17.549401
+ + **Examples** + +
+ Python using GRPC + + ```python + import numpy as np + import time + import tritonclient.grpc as grpcclient + + nptype_convert = { + np.dtype('float32'): 'FP32', + np.dtype('O'): 'BYTES', + np.dtype('int16'): 'INT16', + np.dtype('int32'): 'INT32', + np.dtype('int64'): 'INT64', + } + + server_url = 'koina.proteomicsdb.org:443' + model_name = 'Prosit_2023_intensity_TOF' + batch_size = 1000 + inputs = { + 'peptide_sequences': np.array(["AAAAAKAK", "AAAAAKAK"], dtype=np.dtype("O")).reshape([2,1]), + 'precursor_charges': np.array([1,2], dtype=np.dtype("int32")).reshape([2,1]), + 'collision_energies': np.array([25, 25], dtype=np.dtype("float32")).reshape([2,1]), + } + outputs = [ 'intensities', 'mz', 'annotation', ] + + triton_client = grpcclient.InferenceServerClient(url=server_url, ssl=True) + + koina_outputs = [] + for name in outputs: + koina_outputs.append(grpcclient.InferRequestedOutput(name)) + + predictions = {name: [] for name in outputs} + len_inputs = list(inputs.values())[0].shape[0] + for i in range(0, len_inputs, batch_size): + if len_inputs < i+batch_size: + current_batchsize = len_inputs + else: + current_batchsize = batch_size + + koina_inputs = [] + for iname, iarr in inputs.items(): + koina_inputs.append( + grpcclient.InferInput(iname, [current_batchsize, 1], nptype_convert[iarr.dtype]) + ) + koina_inputs[-1].set_data_from_numpy(iarr[i:i+current_batchsize]) + + prediction = triton_client.infer(model_name, inputs=koina_inputs, outputs=koina_outputs) + + for name in outputs: + predictions[name].append(prediction.as_numpy(name)) + + for key, value in predictions.items(): + predictions[key] = np.vstack(value) + print(key) + print(predictions[key]) + ``` + +
+ operationId: Prosit_2023_intensity_TOF + requestBody: + description: Body + content: + application/json: + schema: + $ref: '#/components/schemas/Prosit_2023_intensity_TOF_input' + required: true + responses: + '200': + description: Successful operation + '405': + description: Invalid input + /Prosit_2020_irt_TMT/infer: post: tags: - Retention Time summary: Prosit_2020_irt_TMT description: | + **Summary** + Find out more about this model here.

If you use predictions generated by this model please cite the following paper.

Wassim Gabriel, Matthew The, Daniel P. Zolg, Florian P. Bayer, Omar Shouman, Ludwig Lautenbacher, Karsten Schnatbaum, Johannes Zerweck, Tobias Knaute, Bernard Delanghe, Andreas Huhmer, Holger Wenschuh, Ulf Reimer, Guillaume Médard, Bernhard Kuster, and Mathias Wilhelm
Analytical Chemistry 2022 94 (20), 7181-7190
DOI: 10.1021/acs.analchem.1c05435
+ + **Citaton** + + + + **Examples** + +
+ Python using GRPC + ```python import numpy as np import time @@ -908,7 +1086,8 @@ paths: print(key) print(predictions[key]) ``` - " + +
operationId: Prosit_2020_irt_TMT requestBody: description: Body @@ -1680,6 +1859,97 @@ components: type: + Prosit_2023_intensity_TOF_input: + type: object + properties: + id: + type: string + example: "0" + inputs: + example: + + - name: peptide_sequences + shape: [2,1] + datatype: BYTES + data: ["AAAAAKAK", "AAAAAKAK"] + + - name: precursor_charges + shape: [2,1] + datatype: INT32 + data: [1,2] + + - name: collision_energies + shape: [2,1] + datatype: FP32 + data: [25, 25] + + type: array + items: + oneOf: + + - $ref: "#/components/schemas/Prosit_2023_intensity_TOF_input_peptide_sequences" + + - $ref: "#/components/schemas/Prosit_2023_intensity_TOF_input_precursor_charges" + + - $ref: "#/components/schemas/Prosit_2023_intensity_TOF_input_collision_energies" + + minItems: 3 + maxItems: 3 + uniqueItems: true + + + Prosit_2023_intensity_TOF_input_peptide_sequences: + type: object + properties: + name: + type: string + shape: + type: array + items: + type: integer + datatype: + type: string + data: + type: array + example: ["AAAAAKAK", "AAAAAKAK"] + items: + type: + + Prosit_2023_intensity_TOF_input_precursor_charges: + type: object + properties: + name: + type: string + shape: + type: array + items: + type: integer + datatype: + type: string + data: + type: array + example: [1,2] + items: + type: + + Prosit_2023_intensity_TOF_input_collision_energies: + type: object + properties: + name: + type: string + shape: + type: array + items: + type: integer + datatype: + type: string + data: + type: array + example: [25, 25] + items: + type: + + Prosit_2020_irt_TMT_input: type: object properties: