diff --git a/README.md b/README.md
index 70bef4dc..21237cc4 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-![PyTorch Tabular](docs/imgs/pytorch_tabular_logo.png)
+![PyTorch Tabular](docs/imgs/pytorch_tabular_logo.png)    
+
 [![pypi](https://img.shields.io/pypi/v/pytorch_tabular.svg)](https://pypi.python.org/pypi/pytorch_tabular)
 [![travis](https://img.shields.io/travis/manujosephv/pytorch_tabular.svg)](https://travis-ci.com/manujosephv/pytorch_tabular)
 [![documentation status](https://readthedocs.org/projects/pytorch_tabular/badge/?version=latest)](https://pytorch_tabular.readthedocs.io/en/latest/?badge=latest)
@@ -62,9 +63,11 @@ For complete Documentation with tutorials visit []
 
 ## Available Models
 
-* FeedForward Network with Category Embedding is a simple FF network, but with and Embedding layers for the categorical columns.
+* FeedForward Network with Category Embedding is a simple FF network, but with an Embedding layers for the categorical columns.
 * [Neural Oblivious Decision Ensembles for Deep Learning on Tabular Data](https://arxiv.org/abs/1909.06312) is a model presented in ICLR 2020 and according to the authors have beaten well-tuned Gradient Boosting models on many datasets.
 * [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/abs/1908.07442) is another model coming out of Google Research which uses Sparse Attention in multiple steps of decision making to model the output.
+* [Mixture Density Networks](https://publications.aston.ac.uk/id/eprint/373/1/NCRG_94_004.pdf) is a regression model which uses gaussian components to approximate the target function and  provide a probabilistic prediction out of the box.
+* [AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921) is a model which tries to learn interactions between the features in an automated way and create a better representation and then use this representation in downstream task
 
 To implement new models, see the [How to implement new models tutorial](https://github.com/manujosephv/pytorch_tabular/blob/main/docs/04-Implementing%20New%20Architectures.ipynb). It covers basic as well as advanced architectures.
 
diff --git a/pytorch_tabular/models/mixture_density/mdn.py b/pytorch_tabular/models/mixture_density/mdn.py
index 4256495d..0e917889 100644
--- a/pytorch_tabular/models/mixture_density/mdn.py
+++ b/pytorch_tabular/models/mixture_density/mdn.py
@@ -378,6 +378,9 @@ def unpack_input(self, x: Dict):
 class NODEMDN(BaseMDN):
     def __init__(self, config: DictConfig, **kwargs):
         super().__init__(config, **kwargs)
+    
+    def subset(self, x):
+        return x[..., :].mean(dim=-2)
 
     def _build_network(self):
         self.hparams.node_input_dim = (
@@ -387,10 +390,7 @@ def _build_network(self):
         # average first n channels of every tree, where n is the number of output targets for regression
         # and number of classes for classification
 
-        def subset(x):
-            return x[..., :].mean(dim=-2)
-
-        output_response = utils.Lambda(subset)
+        output_response = utils.Lambda(self.subset)
         self.backbone = nn.Sequential(backbone, output_response)
         # Adding the last layer
         self.hparams.mdn_config.input_dim = backbone.output_dim
diff --git a/pytorch_tabular/models/node/node_model.py b/pytorch_tabular/models/node/node_model.py
index 4e432b85..4c277518 100644
--- a/pytorch_tabular/models/node/node_model.py
+++ b/pytorch_tabular/models/node/node_model.py
@@ -60,6 +60,9 @@ def __init__(self, config: DictConfig, **kwargs):
         if config.embed_categorical:
             self.embedding_cat_dim = sum([y for x, y in config.embedding_dims])
         super().__init__(config, **kwargs)
+    
+    def subset(self, x):
+            return x[..., : self.hparams.output_dim].mean(dim=-2)
 
     def _build_network(self):
         if self.hparams.embed_categorical:
@@ -79,10 +82,7 @@ def _build_network(self):
         # average first n channels of every tree, where n is the number of output targets for regression
         # and number of classes for classification
 
-        def subset(x):
-            return x[..., : self.hparams.output_dim].mean(dim=-2)
-
-        self.output_response = utils.Lambda(subset)
+        self.output_response = utils.Lambda(self.subset)
 
     def unpack_input(self, x: Dict):
         if self.hparams.embed_categorical:
diff --git a/pytorch_tabular/models/node/utils.py b/pytorch_tabular/models/node/utils.py
index fa7974d1..464add88 100644
--- a/pytorch_tabular/models/node/utils.py
+++ b/pytorch_tabular/models/node/utils.py
@@ -97,8 +97,14 @@ def _threshold_and_support(input, dim=-1):
         return tau, support_size
 
 
-sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim)
-sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1)
+def sparsemax(input, dim=-1):
+    return SparsemaxFunction.apply(input, dim)
+
+
+def sparsemoid(input):
+    return (0.5 * input + 0.5).clamp_(0, 1)
+# sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim)
+# sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1)
 
 
 class Entmax15Function(Function):
@@ -184,7 +190,9 @@ def _backward(output, grad_output):
         return grad_input
 
 
-entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim)
+def entmax15(input, dim=-1):
+    return Entmax15Function.apply(input, dim)
+# entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim)
 entmoid15 = Entmoid15.apply
 
 
diff --git a/tests/test_common.py b/tests/test_common.py
index e8dbef39..090a24f0 100644
--- a/tests/test_common.py
+++ b/tests/test_common.py
@@ -15,9 +15,10 @@
 )
 
 MODEL_CONFIG_SAVE_TEST = [
-    CategoryEmbeddingModelConfig,
-    AutoIntConfig,
-    TabNetModelConfig,
+    (CategoryEmbeddingModelConfig, dict(layers="10-20")),
+    (AutoIntConfig, dict(num_heads=1,num_attn_blocks=1,)),
+    (NodeConfig, dict(num_trees=100, depth=2)),
+    (TabNetModelConfig, dict(n_a=2, n_d=2)),
 ]
 
 MODEL_CONFIG_FEATURE_EXT_TEST = [
@@ -67,7 +68,8 @@ def test_save_load(
         continuous_cols=continuous_cols,
         categorical_cols=categorical_cols,
     )
-    model_config_params = dict(task="regression")
+    model_config_class, model_config_params = model_config_class
+    model_config_params['task']="regression"
     model_config = model_config_class(**model_config_params)
     trainer_config = TrainerConfig(
         max_epochs=3, checkpoints=None, early_stopping=None, gpus=0, fast_dev_run=True