Skip to content

Commit

Permalink
Merge pull request #8 from manujosephv/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
manujosephv authored Apr 12, 2021
2 parents 10dbb12 + 6f66be2 commit 5ea29cb
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 17 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
![PyTorch Tabular](docs/imgs/pytorch_tabular_logo.png)
![PyTorch Tabular](docs/imgs/pytorch_tabular_logo.png)

[![pypi](https://img.shields.io/pypi/v/pytorch_tabular.svg)](https://pypi.python.org/pypi/pytorch_tabular)
[![travis](https://img.shields.io/travis/manujosephv/pytorch_tabular.svg)](https://travis-ci.com/manujosephv/pytorch_tabular)
[![documentation status](https://readthedocs.org/projects/pytorch_tabular/badge/?version=latest)](https://pytorch_tabular.readthedocs.io/en/latest/?badge=latest)
Expand Down Expand Up @@ -62,9 +63,11 @@ For complete Documentation with tutorials visit []

## Available Models

* FeedForward Network with Category Embedding is a simple FF network, but with and Embedding layers for the categorical columns.
* FeedForward Network with Category Embedding is a simple FF network, but with an Embedding layers for the categorical columns.
* [Neural Oblivious Decision Ensembles for Deep Learning on Tabular Data](https://arxiv.org/abs/1909.06312) is a model presented in ICLR 2020 and according to the authors have beaten well-tuned Gradient Boosting models on many datasets.
* [TabNet: Attentive Interpretable Tabular Learning](https://arxiv.org/abs/1908.07442) is another model coming out of Google Research which uses Sparse Attention in multiple steps of decision making to model the output.
* [Mixture Density Networks](https://publications.aston.ac.uk/id/eprint/373/1/NCRG_94_004.pdf) is a regression model which uses gaussian components to approximate the target function and provide a probabilistic prediction out of the box.
* [AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks](https://arxiv.org/abs/1810.11921) is a model which tries to learn interactions between the features in an automated way and create a better representation and then use this representation in downstream task

To implement new models, see the [How to implement new models tutorial](https://github.com/manujosephv/pytorch_tabular/blob/main/docs/04-Implementing%20New%20Architectures.ipynb). It covers basic as well as advanced architectures.

Expand Down
8 changes: 4 additions & 4 deletions pytorch_tabular/models/mixture_density/mdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,9 @@ def unpack_input(self, x: Dict):
class NODEMDN(BaseMDN):
def __init__(self, config: DictConfig, **kwargs):
super().__init__(config, **kwargs)

def subset(self, x):
return x[..., :].mean(dim=-2)

def _build_network(self):
self.hparams.node_input_dim = (
Expand All @@ -387,10 +390,7 @@ def _build_network(self):
# average first n channels of every tree, where n is the number of output targets for regression
# and number of classes for classification

def subset(x):
return x[..., :].mean(dim=-2)

output_response = utils.Lambda(subset)
output_response = utils.Lambda(self.subset)
self.backbone = nn.Sequential(backbone, output_response)
# Adding the last layer
self.hparams.mdn_config.input_dim = backbone.output_dim
Expand Down
8 changes: 4 additions & 4 deletions pytorch_tabular/models/node/node_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def __init__(self, config: DictConfig, **kwargs):
if config.embed_categorical:
self.embedding_cat_dim = sum([y for x, y in config.embedding_dims])
super().__init__(config, **kwargs)

def subset(self, x):
return x[..., : self.hparams.output_dim].mean(dim=-2)

def _build_network(self):
if self.hparams.embed_categorical:
Expand All @@ -79,10 +82,7 @@ def _build_network(self):
# average first n channels of every tree, where n is the number of output targets for regression
# and number of classes for classification

def subset(x):
return x[..., : self.hparams.output_dim].mean(dim=-2)

self.output_response = utils.Lambda(subset)
self.output_response = utils.Lambda(self.subset)

def unpack_input(self, x: Dict):
if self.hparams.embed_categorical:
Expand Down
14 changes: 11 additions & 3 deletions pytorch_tabular/models/node/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,14 @@ def _threshold_and_support(input, dim=-1):
return tau, support_size


sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim)
sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1)
def sparsemax(input, dim=-1):
return SparsemaxFunction.apply(input, dim)


def sparsemoid(input):
return (0.5 * input + 0.5).clamp_(0, 1)
# sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim)
# sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1)


class Entmax15Function(Function):
Expand Down Expand Up @@ -184,7 +190,9 @@ def _backward(output, grad_output):
return grad_input


entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim)
def entmax15(input, dim=-1):
return Entmax15Function.apply(input, dim)
# entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim)
entmoid15 = Entmoid15.apply


Expand Down
10 changes: 6 additions & 4 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
)

MODEL_CONFIG_SAVE_TEST = [
CategoryEmbeddingModelConfig,
AutoIntConfig,
TabNetModelConfig,
(CategoryEmbeddingModelConfig, dict(layers="10-20")),
(AutoIntConfig, dict(num_heads=1,num_attn_blocks=1,)),
(NodeConfig, dict(num_trees=100, depth=2)),
(TabNetModelConfig, dict(n_a=2, n_d=2)),
]

MODEL_CONFIG_FEATURE_EXT_TEST = [
Expand Down Expand Up @@ -67,7 +68,8 @@ def test_save_load(
continuous_cols=continuous_cols,
categorical_cols=categorical_cols,
)
model_config_params = dict(task="regression")
model_config_class, model_config_params = model_config_class
model_config_params['task']="regression"
model_config = model_config_class(**model_config_params)
trainer_config = TrainerConfig(
max_epochs=3, checkpoints=None, early_stopping=None, gpus=0, fast_dev_run=True
Expand Down

0 comments on commit 5ea29cb

Please sign in to comment.