pymc-devs · xieyj17 · Dec 31, 2023 · Jan 1, 2024 · Jan 1, 2024 · Jan 3, 2024
diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py
@@ -17,7 +17,12 @@
 Experimental probability distributions for stochastic nodes in PyMC.
 """
 
-from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell
+from pymc_experimental.distributions.continuous import (
+    Chi,
+    GenExtreme,
+    GenPareto,
+    Maxwell,
+)
 from pymc_experimental.distributions.discrete import (
     BetaNegativeBinomial,
     GeneralizedPoisson,
@@ -32,6 +37,7 @@
     "DiscreteMarkovChain",
     "GeneralizedPoisson",
     "GenExtreme",
+    "GenPareto",
     "R2D2M2CP",
     "Skellam",
     "histogram_approximation",

diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py
@@ -221,6 +221,163 @@ def moment(rv, size, mu, sigma, xi):
         return mode
 
 
+# Generalized Pareto Distribution
+class GenParetoRV(RandomVariable):
-class GenParetoRV(RandomVariable):
+class GenParetoRV(ScipyRandomVariable):
-class GenParetoRV(RandomVariable):
+class GenParetoRV(ScipyRandomVariable):
+    name: str = "Generalized Pareto Distribution"
-    name: str = "Generalized Pareto Distribution"
+    name: str = "Generalized Pareto"
-    name: str = "Generalized Pareto Distribution"
+    name: str = "Generalized Pareto"
+    ndim_supp: int = 0
+    ndims_params: List[int] = [0, 0, 0]
+    dtype: str = "floatX"
+    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}")
-    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}")
+    _print_name: Tuple[str, str] = ("Generalized Pareto", "\\operatorname{GenPareto}")
-    _print_name: Tuple[str, str] = ("Generalized Pareto Distribution", "\\operatorname{GP}")
+    _print_name: Tuple[str, str] = ("Generalized Pareto", "\\operatorname{GenPareto}")
+
+    def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
+        return super().__call__(mu, sigma, xi, size=size, **kwargs)
-    def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
-        return super().__call__(mu, sigma, xi, size=size, **kwargs)
-    def __call__(self, mu=0.0, sigma=1.0, xi=1.0, size=None, **kwargs) -> TensorVariable:
-        return super().__call__(mu, sigma, xi, size=size, **kwargs)
+
+    @classmethod
+    def rng_fn(
+        cls,
+        rng: Union[np.random.RandomState, np.random.Generator],
+        mu: np.ndarray,
+        sigma: np.ndarray,
+        xi: np.ndarray,
+        size: Tuple[int, ...],
+    ) -> np.ndarray:
+        # using scipy's parameterization
+        return stats.genpareto.rvs(c=xi, loc=mu, scale=sigma, random_state=rng, size=size)
+
+
+gp = GenParetoRV()
-gp = GenParetoRV()
+gen_pareto = GenParetoRV()
-gp = GenParetoRV()
+gen_pareto = GenParetoRV()
+
+
+class GenPareto(Continuous):
+    r"""
+    The Generalized Pareto Distribution
+
+    The pdf of this distribution is
+
+    .. math::
+
+       f(x \mid \mu, \sigma, \xi) = \frac{1}{\sigma} (1 + \xi z)^{-1/\xi-1}
+
+    where
+
+    .. math::
+
+        z = \frac{x - \mu}{\sigma}
+
+    and is defined on the set (when :math:`\xi \geq 0`):
+
+    .. math::
+
+        \left\{x: x \geq \mu \right\}
+    .. plot::
+
+        import matplotlib.pyplot as plt
+        import numpy as np
+        import scipy.stats as st
+        import arviz as az
+        plt.style.use('arviz-darkgrid')
+        x = np.linspace(-2, 10, 200)
+        mus = [0., 0., 0., 0., 1., ]
+        sigmas = [1., 1., 1.,2., 1., ]
+        xis = [1., 0., 5., 1., 1.,  ]
+        for mu, sigma, xi in zip(mus, sigmas, xis):
+            pdf = st.genpareto.pdf(x, c=xi, loc=mu, scale=sigma)
+            plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}')
+        plt.xlabel('x', fontsize=12)
+        plt.ylabel('f(x)', fontsize=12)
+        plt.legend(loc=1)
+        plt.show()
+
+
+    ========  =========================================================================
+    Support   * :math:`x \geq \mu`, when :math:`\xi \geq 0`
+    Mean      * :math:`\mu + \frac{\sigma}{1-\xi}`, when :math:`\xi < 1`
+    Variance  * :math:`\frac{\sigma^2}{(1-\xi)^2 (1-2\xi)}`, when :math:`\xi < 0.5`
+    ========  =========================================================================
+
+    Parameters
+    ----------
+    mu : float
+        Location parameter.
+    sigma : float
+        Scale parameter (sigma > 0).
+    xi : float
+        Shape parameter (xi >= 0)
+    """
+
+    rv_op = gp
+
+    @classmethod
+    def dist(cls, mu=0, sigma=1, xi=0, **kwargs):
+        mu = pt.as_tensor_variable(floatX(mu))
+        sigma = pt.as_tensor_variable(floatX(sigma))
+        xi = pt.as_tensor_variable(floatX(xi))
+
+        return super().dist([mu, sigma, xi], **kwargs)
+
+    def logp(value, mu, sigma, xi):
+        """
+        Calculate log-probability of Generalized Pareto distribution
+        at specified value.
+
+        Parameters
+        ----------
+        value: numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or Pytensor tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+
+        scaled = (value - mu) / sigma
+
+        logp_expression = pt.switch(
+            pt.eq(xi, 0),
+            -1 * scaled,
+            -1 * pt.log(sigma) - ((xi + 1) / xi) * pt.log1p(xi * scaled),
+        )
+        logp = pt.switch(pt.ge(scaled, 0), logp_expression, -np.inf)
+        return check_parameters(logp, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0")
+
+    def logcdf(value, mu, sigma, xi):
+        """
+        Compute the log of the cumulative distribution function for Generalized Pareto
+        distribution at the specified value.
+
+        Parameters
+        ----------
+        value: numeric or np.ndarray or `TensorVariable`
+            Value(s) for which log CDF is calculated. If the log CDF for
+            multiple values are desired the values must be provided in a numpy
+            array or `TensorVariable`.
+
+        Returns
+        -------
+        TensorVariable
+        """
+        scaled = (value - mu) / sigma
+        logc_expression = pt.switch(
+            pt.eq(xi, 0),
+            pt.log(1 - pt.exp(-1 * scaled)),
+            pt.log(1 - pt.pow((1 + xi * scaled), (-1 / xi))),
+        )
+        logc = pt.switch(pt.ge(scaled, 0), logc_expression, -np.inf)
+
+        return check_parameters(logc, sigma > 0, xi >= 0, msg="sigma > 0 and xi >= 0")
+
+    def moment(rv, size, mu, sigma, xi):
+        r"""
+        Mean is defined when :math:`\xi < 1`
+        """
+        mean_expression = mu + sigma / (1 - xi)
+        mean = pt.switch(xi < 1, mean_expression, np.inf)
+        if not rv_size_is_none(size):
+            mean = pt.full(size, mean)
+        return check_parameters(mean, xi < 1, msg="xi < 1")
+
+
 class Chi:
     r"""
     :math:`\chi` log-likelihood.

diff --git a/pymc_experimental/model/marginal_model.py b/pymc_experimental/model/marginal_model.py
@@ -84,7 +84,7 @@ class MarginalModel(Model):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.marginalized_rvs = []
-        self._marginalized_named_vars_to_dims = treedict()
+        self._marginalized_named_vars_to_dims = {}
 
     def _delete_rv_mappings(self, rv: TensorVariable) -> None:
         """Remove all model mappings referring to rv

diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py
@@ -33,7 +33,7 @@
 )
 
 # the distributions to be tested
-from pymc_experimental.distributions import Chi, GenExtreme, Maxwell
+from pymc_experimental.distributions import Chi, GenExtreme, GenPareto, Maxwell
 
 
 class TestGenExtremeClass:
@@ -138,6 +138,64 @@ class TestGenExtreme(BaseTestDistributionRandom):
     ]
 
 
+class TestGenParetoClass:
+    """
+    Wrapper class so that tests of experimental additions can be dropped into
+    PyMC directly on adoption.
+
+    pm.logp(GenPareto.dist(mu=0.,sigma=1.,xi=5.),value=1.)
+    """
+
+    def test_logp(self):
+        def ref_logp(value, mu, sigma, xi):
+            if xi == 0:
+                return sp.expon.logpdf((value - mu) / sigma)
+            else:
+                return sp.genpareto.logpdf(value, c=xi, loc=mu, scale=sigma)
+
+        check_logp(
+            GenPareto,
+            R,
+            {"mu": R, "sigma": Rplusbig, "xi": Rplusbig},
+            ref_logp,
+            decimal=select_by_precision(float64=6, float32=2),
+            skip_paramdomain_outside_edge_test=True,
+        )
+
+    def test_logcdf(self):
+        def ref_logc(value, mu, sigma, xi):
+            if xi == 0:
+                return sp.expon.logcdf((value - mu) / sigma)
+            else:
+                return sp.genpareto.logcdf(value, c=xi, loc=mu, scale=sigma)
+
+        check_logcdf(
+            GenPareto,
+            R,
+            {
+                "mu": R,
+                "sigma": Rplusbig,
+                "xi": Rplusbig,
+            },
+            ref_logc,
+            decimal=select_by_precision(float64=6, float32=2),
+            skip_paramdomain_outside_edge_test=True,
+        )
+
+
+class TestGenPareto(BaseTestDistributionRandom):
+    pymc_dist = GenPareto
+    pymc_dist_params = {"mu": 0, "sigma": 1, "xi": 1}
+    expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": 1}
+    reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1}
+    reference_dist = seeded_scipy_distribution_builder("genpareto")
+    tests_to_run = [
+        "check_pymc_params_match_rv_op",
+        "check_pymc_draws_match_reference",
+        "check_rv_size",
+    ]
+
+
 class TestChiClass:
     """
     Wrapper class so that tests of experimental additions can be dropped into