New general improvements.

- atomistic.models.neuralnetwork: New `get_activations` function to get activations of neural network. - Fixed visualization module. - Updated install site. - Bumbed version for release.
muammar · Mar 6, 2020 · 35b3971 · 35b3971
1 parent 85681e9
commit 35b3971
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 18 deletions.
diff --git a/bin/ml4chem b/bin/ml4chem
@@ -41,7 +41,7 @@ def main(**args):
             plot_atomic_features(_file, method=method, backend=backend)
 
         else: 
-            raise NotImplementedError
+            raise NotImplementedError(f"Supported values are {training_plots}")
 
 if __name__ == "__main__":
     main()
diff --git a/docs/source/install.rst b/docs/source/install.rst
@@ -41,7 +41,7 @@ for ML4chem.
 2. Install the requirements::
 
     cd ml4chem
-    pip install -r requirements.txt
+    python3 -m pip install -r requirements.txt
 
 3. After requirements are installed, you can proceed to add ``ml4chem`` to
    your ``PYTHONPATH`` and ``PATH`` (to use the ``ml4chem`` command line

diff --git a/ml4chem/__init__.py b/ml4chem/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.8-dev"
+__version__ = "0.0.8"
diff --git a/ml4chem/atomistic/models/neuralnetwork.py b/ml4chem/atomistic/models/neuralnetwork.py
@@ -5,12 +5,14 @@
 import torch
 
 import numpy as np
+import pandas as pd
 from collections import OrderedDict
 from ml4chem.metrics import compute_rmse
 from ml4chem.atomistic.models.base import DeepLearningModel, DeepLearningTrainer
 from ml4chem.atomistic.models.loss import AtomicMSELoss
-from ml4chem.optim.handler import get_optimizer, get_lr_scheduler
+from ml4chem.optim.handler import get_optimizer, get_lr_scheduler, get_lr
 from ml4chem.utils import convert_elapsed_time, get_chunks, get_number_of_parameters
+from pprint import pformat
 
 
 # Setting precision and starting logger object
@@ -208,6 +210,81 @@ def forward(self, X):
         outputs = torch.stack(outputs)
         return outputs
 
+    def get_activations(self, images, model=None, numpy=True):
+        """Get activations of each hidden-layer
+
+        This function allows to extract activations of each hidden-layer of
+        the neural network. 
+
+        Parameters
+        ----------
+        image : dict
+           Image with structure hash, features. 
+        model : object
+            A ML4Chem model object.
+        numpy : bool
+            Whether we want numpy arrays or tensors. 
+
+
+        Returns
+        -------
+        activations : DataFrame
+            A DataFrame with activations for each layer.  
+        """
+
+        activations = []
+        columns = ["Hash", "atom.index", "atom.symbol"]
+
+        if model is None:
+            model = self
+
+        model.eval()
+
+        for hash, data in images.items():
+            for index, (symbol, features) in enumerate(data):
+
+                counter = 0
+                layer_counter = 0
+                for l, layer in enumerate(model.linears[symbol].modules()):
+                    if isinstance(layer, torch.nn.Linear) and counter == 0:
+                        x = layer(features)
+
+                        if numpy:
+                            data_ = [hash, index, symbol, x.detach().numpy()]
+                        else:
+                            data_ = [hash, index, symbol, x]
+
+                        layer_column_name = f"layer{layer_counter}"
+
+                        if layer_column_name not in columns:
+                            columns.append(layer_column_name)
+
+                        counter += 1
+                        layer_counter += 1
+
+                    elif isinstance(layer, torch.nn.Linear) and counter > 0:
+                        x = layer(x)
+
+                        if numpy:
+                            data_.append(x.detach().numpy())
+                        else:
+                            data_.append(x)
+
+                        layer_column_name = f"layer{layer_counter}"
+                        if layer_column_name not in columns:
+                            columns.append(layer_column_name)
+
+                        counter += 1
+                        layer_counter += 1
+
+                activations.append(data_)
+                del data_
+
+        # Create DataFrame from lists
+        df = pd.DataFrame(activations, columns=columns)
+
+        return df
+
 
 class train(DeepLearningTrainer):
     """Train the model
@@ -291,6 +368,19 @@ def __init__(
 
         self.initial_time = time.time()
 
+        if lossfxn is None:
+            lossfxn = AtomicMSELoss
+
+        logger.info("")
+        logger.info("Training")
+        logger.info("========")
+        logger.info(f"Convergence criteria: {convergence}")
+        logger.info(f"Loss function: {lossfxn.__name__}")
+        if uncertainty is not None:
+            logger.info("Options:")
+            logger.info(f"    - Uncertainty penalization: {pformat(uncertainty)}")
+        logger.info("")
+
         atoms_per_image = data.atoms_per_image
 
         if batch_size is None:
@@ -309,7 +399,7 @@ def __init__(
                     for u in uncertainty
                 ]
 
-        logger.info(" ")
+        logger.info("")
         logging.info("Batch Information")
         logging.info("-----------------")
         logging.info("Number of batches: {}.".format(len(chunks)))
@@ -362,6 +452,7 @@ def __init__(
         self.epochs = epochs
         self.model = model
         self.lr_scheduler = lr_scheduler
+        self.lossfxn = lossfxn
         self.checkpoint = checkpoint
         self.test = test
 
@@ -375,11 +466,6 @@ def __init__(
         else:
             self.uncertainty = uncertainty
 
-        if lossfxn is None:
-            self.lossfxn = AtomicMSELoss
-        else:
-            self.lossfxn = lossfxn
-
         # Let the hunger games begin...
         self.trainer()
 
@@ -388,8 +474,6 @@ def trainer(self):
 
         logger.info(" ")
         logger.info("Starting training...\n")
-        if self.uncertainty is not None:
-            logger.info("Loss function will penalize based on uncertainties.\n")
 
         if self.test is None:
             logger.info(
@@ -500,7 +584,7 @@ def trainer(self):
                 )
                 rmse_atom_test = client.submit(
                     compute_rmse,
-                    *(test_predictions, test_targets, atoms_per_image_test)
+                    *(test_predictions, test_targets, atoms_per_image_test),
                 )
 
                 rmse_test = rmse_test.result()
@@ -586,7 +670,7 @@ def closure(
                         lossfxn,
                         atoms_per_image,
                         device,
-                    )
+                    ),
                 )
             )
         dask.distributed.wait(accumulation)

diff --git a/ml4chem/data/preprocessing.py b/ml4chem/data/preprocessing.py
@@ -97,10 +97,10 @@ def set(self, purpose):
         if purpose == "training" and preprocessor_name is not None:
             logger.info("\nData preprocessing")
             logger.info("------------------")
-            logger.info("Preprocessor: {}.".format(preprocessor_name))
+            logger.info(f"Preprocessor: {preprocessor_name}.")
             logger.info("Options:")
             for k, v in self.kwargs.items():
-                logger.info("    - {}: {}.".format(k, v))
+                logger.info(f"    - {k}: {v}.")
 
         logger.info(" ")
 

diff --git a/ml4chem/visualization.py b/ml4chem/visualization.py
@@ -128,6 +128,10 @@ def read_log(logfile, metric="loss", refresh=None, data_only=False):
                     loss.append(float(line[3]))
                     training.append(float(line[4]))
                     test.append(float(line[6]))
+                except IndexError:
+                    epochs.append(int(line[0]))
+                    loss.append(float(line[3]))
+                    training.append(float(line[4]))
                 except ValueError:
                     pass
 
@@ -280,7 +284,8 @@ def plot_atomic_features(
         raise NotImplementedError
 
     if backend == "seaborn":
-        # This hack is needed because it seems plotly import overwrite everything.
+        # This hack is needed because it seems plotly import overwrite
+        # everything.
         import matplotlib.pyplot as plt
 
     axis = ["x", "y", "z"]
@@ -372,7 +377,7 @@ def plot_atomic_features(
             )
             dim_reduction = make_pipeline(preprocessor, dim_reduction)
 
-        tsne_result = tsne.fit_transform(full_ls)
+        tsne_result = dim_reduction.fit_transform(full_ls)
 
         to_pandas = []