Skip to content

Commit

Permalink
New Gaussian class changes adapted to SVM models.
Browse files Browse the repository at this point in the history
- Now the latest version of Gaussian class is compatible with SVM
  models.
- data/handler.py now has the `atoms_per_image` attribute available for
  both training and inference.
- ml4chem/utils.py get_chunks() function now should work for both SVM
  and non SVM models.
  • Loading branch information
muammar committed Nov 27, 2019
1 parent b32c249 commit cebb391
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 15 deletions.
4 changes: 2 additions & 2 deletions ml4chem/data/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def prepare_images(self, images, purpose=None):
"""
logger.info("Preparing images for {}...".format(purpose))
self.images = OrderedDict()

self.atoms_per_image = []

if purpose == "training":
self.targets = []
self.atoms_per_image = []

duplicates = 0

Expand Down
22 changes: 10 additions & 12 deletions ml4chem/features/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,16 +375,17 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
del stacked_features

# Restack images
feature_space = []

if svm:
computations = []
reference_space = []

for i, image in enumerate(images.items()):
computations.append(
self.restack_image(
i, image, scaled_feature_space=scaled_feature_space, svm=svm
)
restacked = client.submit(
self.restack_image,
*(i, image, None, scaled_feature_space, svm)
)
feature_space.append(restacked)

# image = (hash, ase_image) -> tuple
for atom in image[1]:
Expand All @@ -396,7 +397,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
*reference_space, scheduler=self.scheduler
)
else:
feature_space = []
try:
for i, image in enumerate(images.items()):
restacked = client.submit(
Expand All @@ -408,16 +408,14 @@ def calculate(self, images=None, purpose="training", data=None, svm=False):
except UnboundLocalError:
# scaled_feature_space does not exist.
for i, image in enumerate(images.items()):
computations.append(
self.restack_image(
i, image, feature_space=feature_space, svm=svm
)
restacked = client.submit(
self.restack_image,
*(i, image, feature_space, None, svm)
)
feature_space.append(restacked)

feature_space = client.gather(feature_space)
feature_space = OrderedDict(feature_space)
# feature_space = dask.compute(*computations, scheduler=self.scheduler)
# del computations

preprocessor.save_to_file(preprocessor, self.save_preprocessor)

Expand Down
3 changes: 2 additions & 1 deletion ml4chem/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def get_chunks(sequence, chunk_size, svm=True):
"""
res = []

if svm is False and isinstance(sequence, dict):
#if svm is False and isinstance(sequence, dict):
if isinstance(sequence, dict):
sequence = sequence.items()

for item in sequence:
Expand Down

0 comments on commit cebb391

Please sign in to comment.