Merge pull request #122 from wilhelm-lab/feature/prosit_cit

Feature/prosit cit
wilhelm-lab · Oct 8, 2024 · 501bd9e · 501bd9e
2 parents bfab1fb + 2c06f5e
commit 501bd9e
Show file tree

Hide file tree

Showing 25 changed files with 747 additions and 0 deletions.
diff --git a/clients/python/test/Prosit/arr_Prosit_2024_intensity_cit_int.npy b/clients/python/test/Prosit/arr_Prosit_2024_intensity_cit_int.npy
diff --git a/clients/python/test/Prosit/arr_Prosit_2024_intensity_cit_int_raw.npy b/clients/python/test/Prosit/arr_Prosit_2024_intensity_cit_int_raw.npy
diff --git a/clients/python/test/Prosit/arr_Prosit_2024_irt_cit_irt.npy b/clients/python/test/Prosit/arr_Prosit_2024_irt_cit_irt.npy
diff --git a/clients/python/test/Prosit/arr_Prosit_2024_irt_cit_irt_raw.npy b/clients/python/test/Prosit/arr_Prosit_2024_irt_cit_irt_raw.npy
diff --git a/clients/python/test/Prosit/test_Prosit_2024_intensity_cit.py b/clients/python/test/Prosit/test_Prosit_2024_intensity_cit.py
@@ -0,0 +1,77 @@
+from test.server_config import SERVER_GRPC, SERVER_HTTP
+import tritonclient.grpc as grpcclient
+import numpy as np
+from pathlib import Path
+import requests
+
+# To ensure MODEL_NAME == test_<filename>.py
+MODEL_NAME = Path(__file__).stem.replace("test_", "")
+
+
+def test_available_http():
+    req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1)
+    assert req.status_code == 200
+
+
+def test_available_grpc():
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+    assert triton_client.is_model_ready(MODEL_NAME)
+
+
+def test_inference():
+    SEQUENCES = np.array(
+        [
+            ["AA"],
+            ["PEPTIPEPTIR[UNIMOD:7]EPTIPEPTIPEPTIPEPT"],
+            ["HKDER[UNIMOD:7]STNQCGAVILMFYW"],
+            ["R[UNIMOD:7]HKDESTNQC[UNIMOD:4]GPAVILMFYW"],
+            ["R[UNIMOD:7]HKDESTNQCGPAVILM[UNIMOD:35]FYW"],
+        ],
+        dtype=np.object_,
+    )
+
+    charge = np.array([[3] for _ in range(len(SEQUENCES))], dtype=np.int32)
+    ces = np.array([[30] for _ in range(len(SEQUENCES))], dtype=np.float32)
+    frag = np.array([["HCD"] for _ in range(len(SEQUENCES))], dtype=np.object_)
+    # frag = np.load("test/Prosit/arr_Prosit_2020_intensityTMT_frag.npy").reshape([5,1])
+
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+
+    in_pep_seq = grpcclient.InferInput("peptide_sequences", SEQUENCES.shape, "BYTES")
+    in_pep_seq.set_data_from_numpy(SEQUENCES)
+
+    in_charge = grpcclient.InferInput("precursor_charges", charge.shape, "INT32")
+    in_charge.set_data_from_numpy(charge)
+
+    in_ces = grpcclient.InferInput("collision_energies", ces.shape, "FP32")
+    in_ces.set_data_from_numpy(ces)
+
+    in_frag = grpcclient.InferInput("fragmentation_types", frag.shape, "BYTES")
+    in_frag.set_data_from_numpy(frag)
+
+    result = triton_client.infer(
+        MODEL_NAME,
+        inputs=[in_pep_seq, in_charge, in_ces, in_frag],
+        outputs=[
+            grpcclient.InferRequestedOutput("intensities"),
+            grpcclient.InferRequestedOutput("mz"),
+            grpcclient.InferRequestedOutput("annotation"),
+        ],
+    )
+
+    intensities = result.as_numpy("intensities")
+    fragmentmz = result.as_numpy("mz")
+    annotation = result.as_numpy("annotation")
+
+    assert intensities.shape == (5, 174)
+    assert fragmentmz.shape == (5, 174)
+    assert annotation.shape == (5, 174)
+
+    # Assert intensities consistent
+    assert np.allclose(
+        intensities,
+        np.load("test/Prosit/arr_Prosit_2024_intensity_cit_int.npy"),
+        rtol=0,
+        atol=1e-5,
+        equal_nan=True,
+    )
diff --git a/clients/python/test/Prosit/test_Prosit_2024_intensity_cit_core.py b/clients/python/test/Prosit/test_Prosit_2024_intensity_cit_core.py
@@ -0,0 +1,57 @@
+from test.server_config import SERVER_GRPC, SERVER_HTTP
+import tritonclient.grpc as grpcclient
+import numpy as np
+from pathlib import Path
+import requests
+
+# To ensure MODEL_NAME == test_<filename>.py
+MODEL_NAME = Path(__file__).stem.replace("test_", "")
+
+
+def test_available_http():
+    req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1)
+    assert req.status_code == 200
+
+
+def test_available_grpc():
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+    assert triton_client.is_model_ready(MODEL_NAME)
+
+
+def test_inference():
+    seq = np.load("test/Prosit/arr_Prosit_2019_intensity_seq.npy")
+    charge = np.load("test/Prosit/arr_Prosit_2019_intensity_charge.npy")
+    ces = np.load("test/Prosit/arr_Prosit_2019_intensity_ces.npy")
+    frag = np.load("test/Prosit/arr_Prosit_2020_intensityTMT_frag.npy").reshape([5, 1])
+
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+
+    in_pep_seq = grpcclient.InferInput("modified_sequence", seq.shape, "INT64")
+    in_pep_seq.set_data_from_numpy(seq.astype(np.int64))
+
+    in_charge = grpcclient.InferInput("precursor_charge_onehot", charge.shape, "INT64")
+    in_charge.set_data_from_numpy(charge.astype(np.int64))
+
+    in_ces = grpcclient.InferInput("aligned_collision_energy", ces.shape, "FP32")
+    in_ces.set_data_from_numpy(ces)
+
+    in_frag = grpcclient.InferInput("method_nbr", frag.shape, "INT64")
+    in_frag.set_data_from_numpy(frag.astype(np.int64))
+
+    result = triton_client.infer(
+        MODEL_NAME,
+        inputs=[in_pep_seq, in_charge, in_ces, in_frag],
+        outputs=[
+            grpcclient.InferRequestedOutput("output_1"),
+        ],
+    )
+
+    intensities = result.as_numpy("output_1")
+
+    assert intensities.shape == (5, 174)
+    assert np.allclose(
+        intensities,
+        np.load("test/Prosit/arr_Prosit_2024_intensity_cit_int_raw.npy"),
+        rtol=0,
+        atol=1e-4,
+    )
diff --git a/clients/python/test/Prosit/test_Prosit_2024_irt_cit.py b/clients/python/test/Prosit/test_Prosit_2024_irt_cit.py
@@ -0,0 +1,56 @@
+from test.server_config import SERVER_GRPC, SERVER_HTTP
+import tritonclient.grpc as grpcclient
+import numpy as np
+from pathlib import Path
+import requests
+
+# To ensure MODEL_NAME == test_<filename>.py
+MODEL_NAME = Path(__file__).stem.replace("test_", "")
+
+
+def test_available_http():
+    req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1)
+    assert req.status_code == 200
+
+
+def test_available_grpc():
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+    assert triton_client.is_model_ready(MODEL_NAME)
+
+
+def test_inference():
+    SEQUENCES = np.array(
+        [
+            ["AA"],
+            ["PEPTIPEPTIR[UNIMOD:7]EPTIPEPTIPEPTIPEPT"],
+            ["R[UNIMOD:7]HKDESTNQCGAVILMFYW"],
+            ["R[UNIMOD:7]HKDESTNQC[UNIMOD:4]GPAVILMFYW"],
+            ["R[UNIMOD:7]HKDESTNQCGPAVILM[UNIMOD:35]FYW"],
+        ],
+        dtype=np.object_,
+    )
+
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+
+    in_pep_seq = grpcclient.InferInput("peptide_sequences", [5, 1], "BYTES")
+    in_pep_seq.set_data_from_numpy(SEQUENCES)
+
+    result = triton_client.infer(
+        MODEL_NAME,
+        inputs=[in_pep_seq],
+        outputs=[
+            grpcclient.InferRequestedOutput("irt"),
+        ],
+    )
+
+    irt = result.as_numpy("irt")
+
+    assert irt.shape == (5, 1)
+
+    # Assert intensities consistent
+    assert np.allclose(
+        irt,
+        np.load("test/Prosit/arr_Prosit_2024_irt_cit_irt.npy"),
+        rtol=0,
+        atol=1e-4,
+    )
diff --git a/clients/python/test/Prosit/test_Prosit_2024_irt_cit_core.py b/clients/python/test/Prosit/test_Prosit_2024_irt_cit_core.py
@@ -0,0 +1,46 @@
+from test.server_config import SERVER_GRPC, SERVER_HTTP
+import tritonclient.grpc as grpcclient
+import numpy as np
+from pathlib import Path
+import requests
+
+# To ensure MODEL_NAME == test_<filename>.py
+MODEL_NAME = Path(__file__).stem.replace("test_", "")
+
+
+def test_available_http():
+    req = requests.get(f"{SERVER_HTTP}/v2/models/{MODEL_NAME}", timeout=1)
+    assert req.status_code == 200
+
+
+def test_available_grpc():
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+    assert triton_client.is_model_ready(MODEL_NAME)
+
+
+def test_inference():
+    seq = np.load("test/Prosit/arr_Prosit_2019_intensity_seq.npy")
+
+    triton_client = grpcclient.InferenceServerClient(url=SERVER_GRPC)
+
+    in_pep_seq = grpcclient.InferInput("input_1", seq.shape, "INT64")
+    in_pep_seq.set_data_from_numpy(seq.astype(np.int64))
+
+    result = triton_client.infer(
+        MODEL_NAME,
+        inputs=[in_pep_seq],
+        outputs=[
+            grpcclient.InferRequestedOutput("output_1"),
+        ],
+    )
+
+    irt = result.as_numpy("output_1")
+
+    assert irt.shape == (5, 1)
+    print(irt)
+    assert np.allclose(
+        irt,
+        np.load("test/Prosit/arr_Prosit_2024_irt_cit_irt_raw.npy"),
+        rtol=0,
+        atol=1e-4,
+    )
diff --git a/models/Prosit/Prosit_2024_intensity_cit/1/.donotdelete b/models/Prosit/Prosit_2024_intensity_cit/1/.donotdelete
diff --git a/models/Prosit/Prosit_2024_intensity_cit/config.pbtxt b/models/Prosit/Prosit_2024_intensity_cit/config.pbtxt
@@ -0,0 +1,155 @@
+max_batch_size: 1000
+platform: "ensemble"
+input [
+  {
+   name: 'peptide_sequences',
+   data_type: TYPE_STRING,
+   dims: [-1]
+  },
+  {
+    name: 'precursor_charges',
+    data_type: TYPE_INT32,
+    dims: [1],
+  },
+  {
+    name: 'collision_energies',
+    data_type: TYPE_FP32,
+    dims: [1],
+  },
+  {
+    name: 'fragmentation_types',
+    data_type: TYPE_STRING,
+    dims: [1],
+  }
+]
+output [
+  {
+   name: 'intensities',
+   data_type: TYPE_FP32,
+   dims: [174]
+  },
+ {
+   name: 'mz',
+   data_type: TYPE_FP32,
+   dims: [174]
+ },
+ {
+   name: 'annotation',
+   data_type: TYPE_STRING,
+   dims: [174]
+ }
+]
+
+ensemble_scheduling {
+  step [
+     {
+      model_name: "Prosit_Preprocess_charge_cit"
+      model_version: 1
+      input_map {
+        key: "precursor_charges"
+        value: "precursor_charges"
+      },
+      output_map {
+        key: "precursor_charges_in:0"
+        value: "precursor_charges_in_preprocessed:0"
+      }
+    },
+	{
+      model_name: "Prosit_Preprocess_charge"
+      model_version: 1
+      input_map {
+        key: "precursor_charges"
+        value: "precursor_charges"
+      },
+      output_map {
+        key: "precursor_charges_in:0"
+        value: "precursor_charges_in_preprocessed_FP"
+      }
+    },
+	 {
+      model_name: "Prosit_Preprocess_fragmentation_types_cit"
+      model_version: 1
+      input_map {
+        key: "fragmentation_types"
+        value: "fragmentation_types"
+      },
+      output_map {
+        key: "fragmentation_types_encoding"
+        value: "fragmentation_types_processed:0"
+      }
+    },
+	
+    {
+      model_name: "Prosit_Preprocess_peptide_no_termini"
+      model_version: 1
+      input_map {
+        key: "peptide_sequences"
+        value: "peptide_sequences"
+      },
+      output_map {
+        key: "peptides_in:0"
+        value: "peptides_in:0"
+      }
+    },
+    {
+      model_name: "Prosit_2024_intensity_cit_core"
+      model_version: 1
+      input_map {
+        key: "modified_sequence"
+        value: "peptides_in:0"
+      },
+      input_map {
+        key: "aligned_collision_energy"
+        value: "collision_energies"
+      },
+      input_map {
+        key: "precursor_charge_onehot"
+        value: "precursor_charges_in_preprocessed:0"
+      }
+      input_map {
+        key: "method_nbr"
+        value: "fragmentation_types_processed:0"
+      }
+      output_map {
+        key: "output_1"
+        value: "output_1"
+      }
+    },
+    {
+      model_name: "Prosit_2019_intensity_postprocess"
+      model_version: 1
+      input_map {
+        key: "peptides_in:0"
+        value: "peptide_sequences"
+      },
+      input_map{
+        key: "precursor_charges_in:0"
+        value: "precursor_charges_in_preprocessed_FP"
+      }
+      input_map{
+        key: "peaks_in:0",
+        value: "output_1"
+      }
+      output_map {
+        key: "intensities"
+        value: "intensities"
+      }
+      output_map {
+        key: "mz"
+        value: "mz"
+      }
+    },
+    {
+      model_name: "Prosit_Helper_annotation"
+      model_version: 1
+      input_map {
+        key: "precursor_charges"
+        value: "precursor_charges"
+      },
+      output_map {
+        key: "annotation"
+        value: "annotation"
+      }
+    }
+  ]
+}