diff --git a/README.md b/README.md index a5de6b1..883a5c5 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ The scoring function is built-up from components, which together define the "com * `ROTATABLE BONDS`: Physico-chemical property calculated by `RDKit`[link](https://www.rdkit.org/docs/Cookbook.html#contiguous-rotable-bonds). * `NUMBER OF HYDROGEN BOND DONORS`: Physico-chemical property calculated by `RDKit`[link](https://www.rdkit.org/docs/source/rdkit.Chem.Lipinski.html). * `NUMBER OF HYDROGEN BOND ACCEPTORS`: Physico-chemical property calculated by `RDKit`[link](https://www.rdkit.org/docs/source/rdkit.Chem.Lipinski.html). +* `SLOGP`: Atom-based calculation of LogP using Crippen’s approach as implemented in `RDKit`[link](https://www.rdkit.org/docs/source/rdkit.Chem.Crippen.html). * `NUMBER OF RINGS`: Physico-chemical property calculated by `RDKit`[link](https://www.rdkit.org/docs/source/rdkit.Chem.rdMolDescriptors.html). * `SELECTIVITY`: If the aim is to optimize activity against one target while reducing activity agains another, i.e. to increase compound's selectivity, this component can be used. Uses two [scikit-learn](https://scikit-learn.org/stable/) models. Works with both classification and regression models. One model is predicting the target activity and the other is providing an off-target prediction. The score is reflecting a user defined activity gap between the target and the off-target predictions. diff --git a/data/examples/templates/reinforcement_learning_all_components.json b/data/examples/templates/reinforcement_learning_all_components.json index d50357c..769cdd7 100644 --- a/data/examples/templates/reinforcement_learning_all_components.json +++ b/data/examples/templates/reinforcement_learning_all_components.json @@ -210,6 +210,22 @@ "transformation": true } }, + { + "component_type": "slogp", + "name": "SLogP", + "weight": 1, + "model_path": null, + "smiles": [], + "specific_parameters": { + "transformation_type": "double_sigmoid", + "high": 3, + "low": 1, + "coef_div": 3, + "coef_si": 10, + "coef_se": 10, + "transformation": true + } + }, { "component_type": "num_rings", "name": "Number of rings", diff --git a/scoring/score_components/physchem/__init__.py b/scoring/score_components/physchem/__init__.py index bb5e625..4265a0e 100644 --- a/scoring/score_components/physchem/__init__.py +++ b/scoring/score_components/physchem/__init__.py @@ -4,3 +4,4 @@ from scoring.score_components.physchem.hbd import HBD_Lipinski from scoring.score_components.physchem.num_rings import NumRings from scoring.score_components.physchem.hba import HBA_Lipinski +from scoring.score_components.physchem.slogp import SlogP diff --git a/scoring/score_components/physchem/slogp.py b/scoring/score_components/physchem/slogp.py new file mode 100644 index 0000000..6f382f9 --- /dev/null +++ b/scoring/score_components/physchem/slogp.py @@ -0,0 +1,11 @@ +from rdkit.Chem.Descriptors import MolLogP +from scoring.component_parameters import ComponentParameters +from scoring.score_components.physchem.base_physchem_component import BasePhysChemComponent + + +class SlogP(BasePhysChemComponent): + def __init__(self, parameters: ComponentParameters): + super().__init__(parameters) + + def _calculate_phys_chem_property(self, mol): + return MolLogP(mol) diff --git a/scoring/score_components/score_component_factory.py b/scoring/score_components/score_component_factory.py index 648341a..534b040 100644 --- a/scoring/score_components/score_component_factory.py +++ b/scoring/score_components/score_component_factory.py @@ -4,7 +4,7 @@ from scoring.score_components import TanimotoSimilarity, \ JaccardDistance, CustomAlerts, QedScore, MatchingSubstructure, \ PredictivePropertyComponent, SelectivityComponent, \ - SASComponent, MolWeight, PSA, RotatableBonds, HBD_Lipinski, NumRings, HBA_Lipinski + SASComponent, MolWeight, PSA, RotatableBonds, HBD_Lipinski, NumRings, HBA_Lipinski, SlogP from scoring.score_components.base_score_component import BaseScoreComponent from utils.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum @@ -28,6 +28,7 @@ def _default_scoring_component_registry(self) -> dict: enum.NUM_ROTATABLE_BONDS: RotatableBonds, enum.NUM_HBD_LIPINSKI: HBD_Lipinski, enum.NUM_HBA_LIPINSKI: HBA_Lipinski, + enum.SLOGP: SlogP, enum.NUM_RINGS: NumRings, enum.SELECTIVITY: SelectivityComponent, enum.SA_SCORE: SASComponent diff --git a/unittest_reinvent/scoring_tests/physchem/__init__.py b/unittest_reinvent/scoring_tests/physchem/__init__.py index 5aab095..85ee97e 100644 --- a/unittest_reinvent/scoring_tests/physchem/__init__.py +++ b/unittest_reinvent/scoring_tests/physchem/__init__.py @@ -4,3 +4,4 @@ from unittest_reinvent.scoring_tests.physchem.test_hbd_lipinski import * from unittest_reinvent.scoring_tests.physchem.test_hba_lipinski import * from unittest_reinvent.scoring_tests.physchem.test_num_rings import * +from unittest_reinvent.scoring_tests.physchem.test_slogp_score import * diff --git a/unittest_reinvent/scoring_tests/physchem/test_slogp_score.py b/unittest_reinvent/scoring_tests/physchem/test_slogp_score.py new file mode 100644 index 0000000..779f4c3 --- /dev/null +++ b/unittest_reinvent/scoring_tests/physchem/test_slogp_score.py @@ -0,0 +1,77 @@ +import unittest + +import numpy as np +import numpy.testing as npt + +from scoring.component_parameters import ComponentParameters +from scoring.function import CustomSum +from utils.enums.component_specific_parameters_enum import ComponentSpecificParametersEnum +from utils.enums.scoring_function_component_enum import ScoringFunctionComponentNameEnum +from utils.enums.transformation_type_enum import TransformationTypeEnum + + +class Test_slogp_score_no_transformation(unittest.TestCase): + + @classmethod + def setUpClass(self): + sf_enum = ScoringFunctionComponentNameEnum() + csp_enum = ComponentSpecificParametersEnum() + ts_parameters = ComponentParameters(component_type=sf_enum.SLOGP, + name="SlogP", + weight=1., + smiles=[], + model_path="", + specific_parameters={ + csp_enum.TRANSFORMATION: False + }) + self.sf_state = CustomSum(parameters=[ts_parameters]) + + + def test_slogp_1(self): + smiles = [ + "OC(=O)P(=O)(O)O", + "Cc1ccccc1N1C(=O)c2cc(S(N)(=O)=O)c(Cl)cc2NC1C", + "N12CC3C(=NC4C(C=3)=CC=CC=4)C1=CC1=C(COC(=O)C1(O)CC)C2=O", + "N12CC3C(=NC4C(C=3C(=O)O)=CC3=C(OCCO3)C=4)C1=CC1=C(COC(=O)C1(O)CC)C2=O", + "FC1C=CC(CC(=NS(=O)(=O)C2C=CC(C)=CC=2)N2CCN(CC3C4C(=CC=CC=4)N=C4C=3CN3C4=CC4=C(COC(=O)C4(O)CC)C3=O)CC2)=CC=1" + ] + values = np.array([-0.1579, 2.71412, 2.0796, 1.549, 4.67482]) + score = self.sf_state.get_final_score(smiles=smiles) + npt.assert_array_almost_equal(score.total_score, values, 2) + +class Test_slogp_score_with_double_sigmoid(unittest.TestCase): + + @classmethod + def setUpClass(self): + sf_enum = ScoringFunctionComponentNameEnum() + csp_enum = ComponentSpecificParametersEnum() + tt_enum = TransformationTypeEnum() + specific_parameters = { + csp_enum.TRANSFORMATION: True, + csp_enum.LOW: 1, + csp_enum.HIGH: 3, + csp_enum.COEF_DIV: 3, + csp_enum.COEF_SI: 10, + csp_enum.COEF_SE: 10, + csp_enum.TRANSFORMATION_TYPE: tt_enum.DOUBLE_SIGMOID + } + ts_parameters = ComponentParameters(component_type=sf_enum.SLOGP, + name="SlogP", + weight=1., + smiles=[], + model_path="", + specific_parameters=specific_parameters + ) + self.sf_state = CustomSum(parameters=[ts_parameters]) + + def test_slogp_1(self): + smiles = [ + "OC(=O)P(=O)(O)O", + "Cc1ccccc1N1C(=O)c2cc(S(N)(=O)=O)c(Cl)cc2NC1C", + "N12CC3C(=NC4C(C=3)=CC=CC=4)C1=CC1=C(COC(=O)C1(O)CC)C2=O", + "N12CC3C(=NC4C(C=3C(=O)O)=CC3=C(OCCO3)C=4)C1=CC1=C(COC(=O)C1(O)CC)C2=O", + "FC1C=CC(CC(=NS(=O)(=O)C2C=CC(C)=CC=2)N2CCN(CC3C4C(=CC=CC=4)N=C4C=3CN3C4=CC4=C(COC(=O)C4(O)CC)C3=O)CC2)=CC=1" + ] + values = np.array([0.0, 0.9, 1.0, 1.0, 0.0]) + score = self.sf_state.get_final_score(smiles=smiles) + npt.assert_array_almost_equal(score.total_score, values, 2) diff --git a/utils/enums/scoring_function_component_enum.py b/utils/enums/scoring_function_component_enum.py index f53f4fa..12fe1b2 100644 --- a/utils/enums/scoring_function_component_enum.py +++ b/utils/enums/scoring_function_component_enum.py @@ -17,6 +17,7 @@ class ScoringFunctionComponentNameEnum(): __NUM_HBA_LIPINSKI = "num_hba_lipinski" __NUM_RINGS = "num_rings" __TPSA = "tpsa" + __SLOGP = "slogp" __TOTAL_SCORE = "total_score" # there is no actual component corresponding to this type __AZ_LOGD74 = "az_logd74" __HLM_CLINT = "hlm_clint" @@ -117,6 +118,14 @@ def QED_SCORE(self): def QED_SCORE(self, value): raise ValueError("Do not assign value to a ScoringFunctionComponentNameEnum field") + @property + def SLOGP(self): + return self.__SLOGP + + @SLOGP.setter + def SLOGP(self, value): + raise ValueError("Do not assign value to a ScoringFunctionComponentNameEnum field") + @property def MOLECULAR_WEIGHT(self): return self.__MOLECULAR_WEIGHT