Skip to content

Commit

Permalink
Move utiltiy functions to utils.py and tests accordingly.
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer committed Oct 18, 2024
1 parent b3b9147 commit d8de00b
Show file tree
Hide file tree
Showing 23 changed files with 698 additions and 687 deletions.
252 changes: 0 additions & 252 deletions copulas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,268 +1,16 @@
# -*- coding: utf-8 -*-

"""Top-level package for Copulas."""

__author__ = 'DataCebo, Inc.'
__email__ = '[email protected]'
__version__ = '0.11.2.dev0'

import contextlib
import importlib
import sys
import warnings
from copy import deepcopy
from importlib.metadata import entry_points
from operator import attrgetter
from types import ModuleType

import numpy as np
import pandas as pd

EPSILON = np.finfo(np.float32).eps


class NotFittedError(Exception):
"""NotFittedError class."""


@contextlib.contextmanager
def set_random_state(random_state, set_model_random_state):
"""Context manager for managing the random state.
Args:
random_state (int or np.random.RandomState):
The random seed or RandomState.
set_model_random_state (function):
Function to set the random state on the model.
"""
original_state = np.random.get_state()

np.random.set_state(random_state.get_state())

try:
yield
finally:
current_random_state = np.random.RandomState()
current_random_state.set_state(np.random.get_state())
set_model_random_state(current_random_state)
np.random.set_state(original_state)


def random_state(function):
"""Set the random state before calling the function.
Args:
function (Callable):
The function to wrap around.
"""

def wrapper(self, *args, **kwargs):
if self.random_state is None:
return function(self, *args, **kwargs)

else:
with set_random_state(self.random_state, self.set_random_state):
return function(self, *args, **kwargs)

return wrapper


def validate_random_state(random_state):
"""Validate random state argument.
Args:
random_state (int, numpy.random.RandomState, tuple, or None):
Seed or RandomState for the random generator.
Output:
numpy.random.RandomState
"""
if random_state is None:
return None

if isinstance(random_state, int):
return np.random.RandomState(seed=random_state)
elif isinstance(random_state, np.random.RandomState):
return random_state
else:
raise TypeError(
f'`random_state` {random_state} expected to be an int '
'or `np.random.RandomState` object.'
)


def get_instance(obj, **kwargs):
"""Create new instance of the ``obj`` argument.
Args:
obj (str, type, instance):
"""
instance = None
if isinstance(obj, str):
package, name = obj.rsplit('.', 1)
instance = getattr(importlib.import_module(package), name)(**kwargs)
elif isinstance(obj, type):
instance = obj(**kwargs)
else:
if kwargs:
instance = obj.__class__(**kwargs)
else:
args = getattr(obj, '__args__', ())
kwargs = getattr(obj, '__kwargs__', {})
instance = obj.__class__(*args, **kwargs)

return instance


def store_args(__init__):
"""Save ``*args`` and ``**kwargs`` used in the ``__init__`` of a copula.
Args:
__init__(callable): ``__init__`` function to store their arguments.
Returns:
callable: Decorated ``__init__`` function.
"""

def new__init__(self, *args, **kwargs):
args_copy = deepcopy(args)
kwargs_copy = deepcopy(kwargs)
__init__(self, *args, **kwargs)
self.__args__ = args_copy
self.__kwargs__ = kwargs_copy

return new__init__


def get_qualified_name(_object):
"""Return the Fully Qualified Name from an instance or class."""
module = _object.__module__
if hasattr(_object, '__name__'):
_class = _object.__name__

else:
_class = _object.__class__.__name__

return module + '.' + _class


def vectorize(function):
"""Allow a method that only accepts scalars to accept vectors too.
This decorator has two different behaviors depending on the dimensionality of the
array passed as an argument:
**1-d array**
It will work under the assumption that the `function` argument is a callable
with signature::
function(self, X, *args, **kwargs)
where X is an scalar magnitude.
In this case the arguments of the input array will be given one at a time, and
both the input and output of the decorated function will have shape (n,).
**2-d array**
It will work under the assumption that the `function` argument is a callable with signature::
function(self, X0, ..., Xj, *args, **kwargs)
where `Xi` are scalar magnitudes.
It will pass the contents of each row unpacked on each call. The input is espected to have
shape (n, j), the output a shape of (n,)
It will return a function that is guaranteed to return a `numpy.array`.
Args:
function(callable): Function that only accept and return scalars.
Returns:
callable: Decorated function that can accept and return :attr:`numpy.array`.
"""

def decorated(self, X, *args, **kwargs):
if not isinstance(X, np.ndarray):
return function(self, X, *args, **kwargs)

if len(X.shape) == 1:
X = X.reshape([-1, 1])

if len(X.shape) == 2:
return np.fromiter(
(function(self, *x, *args, **kwargs) for x in X), np.dtype('float64')
)

else:
raise ValueError('Arrays of dimensionality higher than 2 are not supported.')

decorated.__doc__ = function.__doc__
return decorated


def scalarize(function):
"""Allow methods that only accepts 1-d vectors to work with scalars.
Args:
function(callable): Function that accepts and returns vectors.
Returns:
callable: Decorated function that accepts and returns scalars.
"""

def decorated(self, X, *args, **kwargs):
scalar = not isinstance(X, np.ndarray)

if scalar:
X = np.array([X])

result = function(self, X, *args, **kwargs)
if scalar:
result = result[0]

return result

decorated.__doc__ = function.__doc__
return decorated


def check_valid_values(function):
"""Raise an exception if the given values are not supported.
Args:
function(callable): Method whose unique argument is a numpy.array-like object.
Returns:
callable: Decorated function
Raises:
ValueError: If there are missing or invalid values or if the dataset is empty.
"""

def decorated(self, X, *args, **kwargs):
if isinstance(X, pd.DataFrame):
W = X.to_numpy()

else:
W = X

if not len(W):
raise ValueError('Your dataset is empty.')

if not (np.issubdtype(W.dtype, np.floating) or np.issubdtype(W.dtype, np.integer)):
raise ValueError('There are non-numerical values in your data.')

if np.isnan(W).any().any():
raise ValueError('There are nan values in your data.')

return function(self, X, *args, **kwargs)

return decorated


def _get_addon_target(addon_path_name):
"""Find the target object for the add-on.
Expand Down
2 changes: 1 addition & 1 deletion copulas/bivariate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from copulas import EPSILON
from copulas.utils import EPSILON
from copulas.bivariate.base import Bivariate, CopulaTypes
from copulas.bivariate.clayton import Clayton
from copulas.bivariate.frank import Frank
Expand Down
3 changes: 2 additions & 1 deletion copulas/bivariate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from scipy import stats
from scipy.optimize import brentq

from copulas import EPSILON, NotFittedError, random_state, validate_random_state
from copulas.bivariate.utils import split_matrix
from copulas.errors import NotFittedError
from copulas.utils import EPSILON, random_state, validate_random_state


class CopulaTypes(Enum):
Expand Down
2 changes: 1 addition & 1 deletion copulas/bivariate/frank.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import scipy.integrate as integrate
from scipy.optimize import least_squares

from copulas import EPSILON
from copulas.bivariate.base import Bivariate, CopulaTypes
from copulas.bivariate.utils import split_matrix
from copulas.utils import EPSILON

MIN_FLOAT_LOG = np.log(sys.float_info.min)
MAX_FLOAT_LOG = np.log(sys.float_info.max)
Expand Down
2 changes: 1 addition & 1 deletion copulas/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd
from scipy import stats

from copulas import set_random_state, validate_random_state
from copulas.utils import set_random_state, validate_random_state


def _dummy_fn(state):
Expand Down
5 changes: 5 additions & 0 deletions copulas/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Copulas Exceptions."""


class NotFittedError(Exception):
"""NotFittedError class."""
3 changes: 2 additions & 1 deletion copulas/multivariate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import numpy as np

from copulas import NotFittedError, get_instance, validate_random_state
from copulas.errors import NotFittedError
from copulas.utils import get_instance, validate_random_state


class Multivariate(object):
Expand Down
6 changes: 3 additions & 3 deletions copulas/multivariate/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import pandas as pd
from scipy import stats

from copulas import (
from copulas.multivariate.base import Multivariate
from copulas.univariate import GaussianUnivariate, Univariate
from copulas.utils import (
EPSILON,
check_valid_values,
get_instance,
Expand All @@ -16,8 +18,6 @@
store_args,
validate_random_state,
)
from copulas.multivariate.base import Multivariate
from copulas.univariate import GaussianUnivariate, Univariate

LOGGER = logging.getLogger(__name__)
DEFAULT_DISTRIBUTION = Univariate
Expand Down
2 changes: 1 addition & 1 deletion copulas/multivariate/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import numpy as np
import scipy

from copulas import EPSILON, get_qualified_name
from copulas.bivariate.base import Bivariate
from copulas.multivariate.base import Multivariate
from copulas.utils import EPSILON, get_qualified_name

LOGGER = logging.getLogger(__name__)

Expand Down
10 changes: 5 additions & 5 deletions copulas/multivariate/vine.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
import numpy as np
import pandas as pd

from copulas import (
from copulas.bivariate.base import Bivariate, CopulaTypes
from copulas.multivariate.base import Multivariate
from copulas.multivariate.tree import Tree, get_tree
from copulas.univariate.gaussian_kde import GaussianKDE
from copulas.utils import (
EPSILON,
check_valid_values,
get_qualified_name,
random_state,
store_args,
validate_random_state,
)
from copulas.bivariate.base import Bivariate, CopulaTypes
from copulas.multivariate.base import Multivariate
from copulas.multivariate.tree import Tree, get_tree
from copulas.univariate.gaussian_kde import GaussianKDE

LOGGER = logging.getLogger(__name__)

Expand Down
6 changes: 3 additions & 3 deletions copulas/univariate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

import numpy as np

from copulas import (
NotFittedError,
from copulas.errors import NotFittedError
from copulas.univariate.selection import select_univariate
from copulas.utils import (
get_instance,
get_qualified_name,
random_state,
store_args,
validate_random_state,
)
from copulas.univariate.selection import select_univariate


class ParametricType(Enum):
Expand Down
Loading

0 comments on commit d8de00b

Please sign in to comment.