Skip to content

Commit

Permalink
New general improvements.
Browse files Browse the repository at this point in the history
- atomistic.models.neuralnetwork: New `get_activations` function to get
  activations of neural network.
- Fixed visualization module.
- Updated install site.
- Bumbed version for release.
  • Loading branch information
muammar committed Mar 6, 2020
1 parent 85681e9 commit 35b3971
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 18 deletions.
2 changes: 1 addition & 1 deletion bin/ml4chem
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def main(**args):
plot_atomic_features(_file, method=method, backend=backend)

else:
raise NotImplementedError
raise NotImplementedError(f"Supported values are {training_plots}")

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion docs/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ for ML4chem.
2. Install the requirements::

cd ml4chem
pip install -r requirements.txt
python3 -m pip install -r requirements.txt

3. After requirements are installed, you can proceed to add ``ml4chem`` to
your ``PYTHONPATH`` and ``PATH`` (to use the ``ml4chem`` command line
Expand Down
2 changes: 1 addition & 1 deletion ml4chem/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.8-dev"
__version__ = "0.0.8"
106 changes: 95 additions & 11 deletions ml4chem/atomistic/models/neuralnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import torch

import numpy as np
import pandas as pd
from collections import OrderedDict
from ml4chem.metrics import compute_rmse
from ml4chem.atomistic.models.base import DeepLearningModel, DeepLearningTrainer
from ml4chem.atomistic.models.loss import AtomicMSELoss
from ml4chem.optim.handler import get_optimizer, get_lr_scheduler
from ml4chem.optim.handler import get_optimizer, get_lr_scheduler, get_lr
from ml4chem.utils import convert_elapsed_time, get_chunks, get_number_of_parameters
from pprint import pformat


# Setting precision and starting logger object
Expand Down Expand Up @@ -208,6 +210,81 @@ def forward(self, X):
outputs = torch.stack(outputs)
return outputs

def get_activations(self, images, model=None, numpy=True):
"""Get activations of each hidden-layer
This function allows to extract activations of each hidden-layer of
the neural network.
Parameters
----------
image : dict
Image with structure hash, features.
model : object
A ML4Chem model object.
numpy : bool
Whether we want numpy arrays or tensors.
Returns
-------
activations : DataFrame
A DataFrame with activations for each layer.
"""

activations = []
columns = ["Hash", "atom.index", "atom.symbol"]

if model is None:
model = self

model.eval()

for hash, data in images.items():
for index, (symbol, features) in enumerate(data):

counter = 0
layer_counter = 0
for l, layer in enumerate(model.linears[symbol].modules()):
if isinstance(layer, torch.nn.Linear) and counter == 0:
x = layer(features)

if numpy:
data_ = [hash, index, symbol, x.detach().numpy()]
else:
data_ = [hash, index, symbol, x]

layer_column_name = f"layer{layer_counter}"

if layer_column_name not in columns:
columns.append(layer_column_name)

counter += 1
layer_counter += 1

elif isinstance(layer, torch.nn.Linear) and counter > 0:
x = layer(x)

if numpy:
data_.append(x.detach().numpy())
else:
data_.append(x)

layer_column_name = f"layer{layer_counter}"
if layer_column_name not in columns:
columns.append(layer_column_name)

counter += 1
layer_counter += 1

activations.append(data_)
del data_

# Create DataFrame from lists
df = pd.DataFrame(activations, columns=columns)

return df


class train(DeepLearningTrainer):
"""Train the model
Expand Down Expand Up @@ -291,6 +368,19 @@ def __init__(

self.initial_time = time.time()

if lossfxn is None:
lossfxn = AtomicMSELoss

logger.info("")
logger.info("Training")
logger.info("========")
logger.info(f"Convergence criteria: {convergence}")
logger.info(f"Loss function: {lossfxn.__name__}")
if uncertainty is not None:
logger.info("Options:")
logger.info(f" - Uncertainty penalization: {pformat(uncertainty)}")
logger.info("")

atoms_per_image = data.atoms_per_image

if batch_size is None:
Expand All @@ -309,7 +399,7 @@ def __init__(
for u in uncertainty
]

logger.info(" ")
logger.info("")
logging.info("Batch Information")
logging.info("-----------------")
logging.info("Number of batches: {}.".format(len(chunks)))
Expand Down Expand Up @@ -362,6 +452,7 @@ def __init__(
self.epochs = epochs
self.model = model
self.lr_scheduler = lr_scheduler
self.lossfxn = lossfxn
self.checkpoint = checkpoint
self.test = test

Expand All @@ -375,11 +466,6 @@ def __init__(
else:
self.uncertainty = uncertainty

if lossfxn is None:
self.lossfxn = AtomicMSELoss
else:
self.lossfxn = lossfxn

# Let the hunger games begin...
self.trainer()

Expand All @@ -388,8 +474,6 @@ def trainer(self):

logger.info(" ")
logger.info("Starting training...\n")
if self.uncertainty is not None:
logger.info("Loss function will penalize based on uncertainties.\n")

if self.test is None:
logger.info(
Expand Down Expand Up @@ -500,7 +584,7 @@ def trainer(self):
)
rmse_atom_test = client.submit(
compute_rmse,
*(test_predictions, test_targets, atoms_per_image_test)
*(test_predictions, test_targets, atoms_per_image_test),
)

rmse_test = rmse_test.result()
Expand Down Expand Up @@ -586,7 +670,7 @@ def closure(
lossfxn,
atoms_per_image,
device,
)
),
)
)
dask.distributed.wait(accumulation)
Expand Down
4 changes: 2 additions & 2 deletions ml4chem/data/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ def set(self, purpose):
if purpose == "training" and preprocessor_name is not None:
logger.info("\nData preprocessing")
logger.info("------------------")
logger.info("Preprocessor: {}.".format(preprocessor_name))
logger.info(f"Preprocessor: {preprocessor_name}.")
logger.info("Options:")
for k, v in self.kwargs.items():
logger.info(" - {}: {}.".format(k, v))
logger.info(f" - {k}: {v}.")

logger.info(" ")

Expand Down
9 changes: 7 additions & 2 deletions ml4chem/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ def read_log(logfile, metric="loss", refresh=None, data_only=False):
loss.append(float(line[3]))
training.append(float(line[4]))
test.append(float(line[6]))
except IndexError:
epochs.append(int(line[0]))
loss.append(float(line[3]))
training.append(float(line[4]))
except ValueError:
pass

Expand Down Expand Up @@ -280,7 +284,8 @@ def plot_atomic_features(
raise NotImplementedError

if backend == "seaborn":
# This hack is needed because it seems plotly import overwrite everything.
# This hack is needed because it seems plotly import overwrite
# everything.
import matplotlib.pyplot as plt

axis = ["x", "y", "z"]
Expand Down Expand Up @@ -372,7 +377,7 @@ def plot_atomic_features(
)
dim_reduction = make_pipeline(preprocessor, dim_reduction)

tsne_result = tsne.fit_transform(full_ls)
tsne_result = dim_reduction.fit_transform(full_ls)

to_pandas = []

Expand Down

0 comments on commit 35b3971

Please sign in to comment.