Skip to content

Commit

Permalink
Merge pull request #255 from JacksonBurns/minhashfp-add
Browse files Browse the repository at this point in the history
add the minhash fingerprint
  • Loading branch information
himaghna authored Apr 7, 2023
2 parents 95b2ed7 + 3cb762f commit 4db502d
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 20 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*.pyc

# build and distribution files
AIMSim.egg-info/*
aimsim.egg-info/*
build/*
dist/*

Expand All @@ -27,4 +27,4 @@ htmlcov/*
.ipynb_checkpoints/*

# Temp files from testing
temp_*
temp_*
21 changes: 6 additions & 15 deletions AIMSim-demo.ipynb

Large diffs are not rendered by default.

7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ Start `AIMSim` with a prepared configuration YAML file (`config.yaml`):

_The following are available via command line use (config.yaml) only:_

4. All fingerprints available from the [ccbmlib](https://github.com/vogt-m/ccbmlib) package (_specify 'ccbmlib:descriptorname' for command line input_).
5. All descriptors and fingerprints available from [PaDELPy](https://github.com/ecrl/padelpy), an interface to PaDEL-Descriptor. (_specify 'padelpy:desciptorname' for command line input._).
6. All descriptors available through the [Mordred](https://github.com/mordred-descriptor/mordred) library (_specify 'mordred:desciptorname' for command line input._). To enable this option, you must install with `pip install 'aimsim[mordred]'` (see disclaimer in the Installation section above).
4. MinHash Fingerprint (see [MHFP](https://github.com/reymond-group/mhfp))
5. All fingerprints available from the [ccbmlib](https://github.com/vogt-m/ccbmlib) package (_specify 'ccbmlib:descriptorname' for command line input_).
6. All descriptors and fingerprints available from [PaDELPy](https://github.com/ecrl/padelpy), an interface to PaDEL-Descriptor. (_specify 'padelpy:desciptorname' for command line input._).
7. All descriptors available through the [Mordred](https://github.com/mordred-descriptor/mordred) library (_specify 'mordred:desciptorname' for command line input._). To enable this option, you must install with `pip install 'aimsim[mordred]'` (see disclaimer in the Installation section above).

### Currently Implemented Similarity Scores

Expand Down
35 changes: 35 additions & 0 deletions aimsim/ops/descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from aimsim.utils.ccbmlib_fingerprints import generate_fingerprints
from padelpy import from_smiles
from aimsim.utils.extras import requires_mordred
from mhfp.encoder import MHFPEncoder

try:
from mordred import Calculator, descriptors
Expand Down Expand Up @@ -264,6 +265,27 @@ def _set_ccbmlib_fingerprint(self, molecule_graph, descriptor, **kwargs):
self.label_ = descriptor
self.params_ = {}

def _set_minhash_fingerprint(self, molecule_graph, **kwargs):
"""Set the descriptor to the minhash fingerprint.
Args:
molecule_graph (RDKIT object): Graph of molecule to be fingerprinted.
"""
mhfp_encoder = MHFPEncoder(
n_permutations=kwargs["n_permutations"],
seed=kwargs["seed"],
)
fp = mhfp_encoder.encode_mol(
molecule_graph,
radius=kwargs["radius"],
rings=kwargs["rings"],
kekulize=kwargs["kekulize"],
)
self.numpy_ = fp
self.label_ = "minhash_fingerprint"
self.params_ = kwargs

def make_fingerprint(
self, molecule_graph, fingerprint_type, fingerprint_params=None
):
Expand All @@ -285,6 +307,18 @@ def make_fingerprint(
morgan_params = {"radius": 3, "n_bits": 1024}
morgan_params.update(fingerprint_params)
self._set_morgan_fingerprint(molecule_graph=molecule_graph, **morgan_params)
elif fingerprint_type == "minhash_fingerprint":
minhash_params = {
"n_permutations": 2048,
"seed": 42,
"radius": 3,
"rings": True,
"kekulize": True,
}
minhash_params.update(fingerprint_params)
self._set_minhash_fingerprint(
molecule_graph=molecule_graph, **minhash_params
)
elif fingerprint_type == "topological_fingerprint":
topological_params = {"min_path": 1, "max_path": 7}
topological_params.update(fingerprint_params)
Expand Down Expand Up @@ -480,6 +514,7 @@ def get_all_supported_descriptors():
"morgan_fingerprint",
"topological_fingerprint",
"daylight_fingerprint",
"minhash_fingerprint",
"maccs_keys",
"atom-pair_fingerprint",
"torsion_fingerprint",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ padelpy
plotly
customtkinter<5.0.0
tkinter-tooltip
mhfp
26 changes: 26 additions & 0 deletions tests/test_Descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,31 @@ def test_padelpy_descriptors(self):
with self.assertRaises(ValueError):
descriptor.to_rdkit()

def test_minhash_fingerprint(self):
"""Test creation of minhash fingerprint"""
mol_graph = MolFromSmiles("CCOCC")
descriptor = Descriptor()
descriptor.make_fingerprint(
molecule_graph=mol_graph, fingerprint_type="minhash_fingerprint"
)
self.assertTrue(
descriptor.check_init(),
"Expected Descriptor object to be initialized",
)
self.assertEqual(
descriptor.label_,
"minhash_fingerprint",
"Expected label of descriptor initialized with "
"{} to match the fingerprint".format("minhash_fingerprint"),
)
self.assertIsInstance(
descriptor.to_numpy(),
np.ndarray,
"Expected numpy.ndarray from to_numpy()",
)
with self.assertRaises(ValueError):
descriptor.to_rdkit()

def test_ccbmlib_descriptors(self):
"""Test ability to passthrough descriptors to ccbmlib."""
mol_graph = MolFromSmiles("CCOCC")
Expand Down Expand Up @@ -310,6 +335,7 @@ def test_exptl_descriptors(self):
"maccs_keys",
"atom-pair_fingerprint",
"torsion_fingerprint",
"minhash_fingerprint",
]
for desc in fprint_list:
descriptor = Descriptor()
Expand Down

0 comments on commit 4db502d

Please sign in to comment.