Skip to content

Commit

Permalink
Merge pull request #127 from ErBarb/erald/feature/NMS2
Browse files Browse the repository at this point in the history
Prosit_XL_NMS2
  • Loading branch information
LLautenbacher authored Oct 11, 2024
2 parents 73aed2f + 98de77f commit 5319252
Show file tree
Hide file tree
Showing 28 changed files with 1,220 additions and 0 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
23 changes: 23 additions & 0 deletions clients/python/test/Prosit/test_Prosit_2024_intensity_XL_NMS2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from test.server_config import SERVER_GRPC, SERVER_HTTP
from pathlib import Path
from test.lib import (
lib_test_available_grpc,
lib_test_available_http,
lib_test_inference,
)


# To ensure MODEL_NAME == test_<filename>.py
MODEL_NAME = Path(__file__).stem.replace("test_", "")


def test_available_http():
lib_test_available_http(MODEL_NAME, SERVER_HTTP)


def test_available_grpc():
lib_test_available_grpc(MODEL_NAME, SERVER_GRPC)


def test_inference():
lib_test_inference(MODEL_NAME, SERVER_GRPC, atol=1e-4)
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from test.server_config import SERVER_GRPC, SERVER_HTTP
from pathlib import Path
from test.lib import (
lib_test_available_grpc,
lib_test_available_http,
lib_test_inference,
)


# To ensure MODEL_NAME == test_<filename>.py
MODEL_NAME = Path(__file__).stem.replace("test_", "")


def test_available_http():
lib_test_available_http(MODEL_NAME, SERVER_HTTP)


def test_available_grpc():
lib_test_available_grpc(MODEL_NAME, SERVER_GRPC)


def test_inference():
lib_test_inference(MODEL_NAME, SERVER_GRPC, atol=1e-4)
Empty file.
158 changes: 158 additions & 0 deletions models/Prosit/Prosit_2024_intensity_XL_NMS2/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
max_batch_size: 1000
platform: "ensemble"
input [
{
name: 'peptide_sequences_1',
data_type: TYPE_STRING,
dims: [-1]
},
{
name: 'peptide_sequences_2',
data_type: TYPE_STRING,
dims: [-1]
},
{
name: 'precursor_charges',
data_type: TYPE_INT32,
dims: [1],
},
{
name: 'collision_energies',
data_type: TYPE_FP32,
dims: [1],
}
]
output [
{
name: 'intensities',
data_type: TYPE_FP32,
dims: [174]
},
{
name: 'mz',
data_type: TYPE_FP32,
dims: [174]
},
{
name: 'annotation',
data_type: TYPE_STRING,
dims: [174]
}
]

ensemble_scheduling {
step [
{
model_name: "Prosit_Preprocess_charge"
model_version: 1
input_map {
key: "precursor_charges"
value: "precursor_charges"
},
output_map {
key: "precursor_charges_in:0"
value: "precursor_charges_in_preprocessed:0"
}
},
{
model_name: "XL_Prosit_Preprocess_peptide_NMS2"
model_version: 1
input_map {
key: "peptide_sequences_1"
value: "peptide_sequences_1"
},
input_map {
key: "peptide_sequences_2"
value: "peptide_sequences_2"
}
output_map {
key: "peptides_in_1:0"
value: "peptides_in_1:0"
},
output_map {
key: "peptides_in_2:0"
value: "peptides_in_2:0"
}
},
{
model_name: "Prosit_Preprocess_collision_energy"
model_version: 1
input_map {
key: "raw_collision_energy"
value: "collision_energies"
},
output_map {
key: "norm_collision_energy"
value: "norm_collision_energy"
}
},
{
model_name: "Prosit_2024_intensity_XL_NMS2_core"
model_version: 1
input_map {
key: "peptides_in1"
value: "peptides_in_1:0"
},
input_map {
key: "peptides_in2"
value: "peptides_in_2:0"
},
input_map {
key: "collision_energy_in"
value: "norm_collision_energy"
},
input_map {
key: "precursor_charge_in"
value: "precursor_charges_in_preprocessed:0"
}
output_map {
key: "out_1"
value: "out/Reshape:0"
}
},
{
model_name: "XL_Prosit_NMS2_2024_intensity_postprocess"
model_version: 1
input_map {
key: "peptides_in_1:0"
value: "peptide_sequences_1"
},
input_map {
key: "peptides_in_2:0"
value: "peptide_sequences_2"
},
input_map{
key: "precursor_charges_in:0"
value: "precursor_charges_in_preprocessed:0"
}
input_map{
key: "peaks_in:0",
value: "out/Reshape:0"
}
output_map {
key: "intensities"
value: "intensities"
}
output_map {
key: "mz"
value: "mz"
}
},
{
model_name: "XL_Prosit_NMS2_Helper_annotation"
model_version: 1
input_map {
key: "precursor_charges"
value: "precursor_charges"
},
input_map {
key: "peptide_sequences_1"
value: "peptide_sequences_1"
}
output_map {
key: "annotation"
value: "annotation"
}
}
]
}
37 changes: 37 additions & 0 deletions models/Prosit/Prosit_2024_intensity_XL_NMS2/notes.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
description: |
The HCD Prosit 2020 model was fine-tuned using using 251,449 sets of cross-linked peptides, specifically those created from DSSO and DSBU.
The model's structure was adjusted to accommodate two input peptides and generate both non-xl and xl fragments in the output.
The dataset was divided into three parts: training (80% - 198,294 sets), validation (10% - 26,610 sets), and test (10% - 26,545 sets).
This model achieved a SA (a measure of accuracy) of at least 0.83, which is a significant improvement compared to the HCD Prosit 2020 model's performance on cross-linked peptides, where the accuracy was SA ≥ 0.58
tag: "Intensity"
tag_url: "https://koina.wilhelmlab.org/"
examples:
inputs:
[
{
"name": "peptide_sequences_1",
"httpdtype": "BYTES",
"shape": "[2,1]",
"data": '["DIADAVTAAGVEVAK[UNIMOD:1896]SEVR", "AGDQIQSGVDAAIK[UNIMOD:1896]PGNTLPMR"]'
},
{
"name": "peptide_sequences_2",
"httpdtype": "BYTES",
"shape": "[2,1]",
"data": '["NFLVPQGK[UNIMOD:1896]AVPATK", "SANIALVLYK[UNIMOD:1896]DGER"]'
},
{
"name": "precursor_charges",
"httpdtype": "INT32",
"shape": "[2,1]",
"data": '[3,4]'
},
{
"name": "collision_energies",
"httpdtype": "FP32",
"shape": "[2,1]",
"data": '[28, 28]'
},
]
2 changes: 2 additions & 0 deletions models/Prosit/Prosit_2024_intensity_XL_NMS2_core/1/.zenodo
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
https://zenodo.org/records/11259344/files/model.savedmodel.zip?download=1
md5:5e65b1e60013d94c1e957d62ad601b03
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
max_batch_size: 1000
Loading

0 comments on commit 5319252

Please sign in to comment.