Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BetaGeoBetaBinom Distribution Block #431

Merged
merged 35 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
81c6e54
init commit
ColtAllen Nov 11, 2023
882018e
removed scan
ColtAllen Nov 11, 2023
a96929b
Fix logp
ricardoV94 Nov 14, 2023
30c39a0
Remove print statement
ricardoV94 Nov 14, 2023
777f715
Add test for logp notimplemented errors
ricardoV94 Nov 14, 2023
9b59449
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Dec 11, 2023
0aac157
docstrings
ColtAllen Dec 11, 2023
2bac71d
dev notebook added
ColtAllen Dec 11, 2023
7472fee
updated to vectorize_graph
ColtAllen Dec 16, 2023
6a0239d
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 7, 2024
e70caa6
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 11, 2024
2eb60ae
import order
ColtAllen Jan 12, 2024
3c07ad4
update oldest pymc_version
ColtAllen Jan 12, 2024
6cec635
Update ci.yml pymc version
ColtAllen Jan 12, 2024
c2d44b0
Update pyproject.toml pymc version
ColtAllen Jan 12, 2024
bd0fe40
Merge branch 'bgbb_dist' of https://github.com/ColtAllen/pymc-marketi…
ColtAllen Jan 12, 2024
d2d02cd
WIP sample prior testing
ColtAllen Jan 30, 2024
348691a
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Jan 30, 2024
4329380
sample prior compared against lifetimes
ColtAllen Feb 3, 2024
2423c12
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Feb 9, 2024
4040ffc
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Feb 24, 2024
d4d3455
increase rtol
ColtAllen Feb 24, 2024
bb6b893
Merge branch 'main' into bgbb_dist
ColtAllen Mar 25, 2024
22d1269
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 12, 2024
618f0b3
Merge branch 'main' into bgbb_dist
ColtAllen Apr 19, 2024
f0d9588
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 25, 2024
d839bee
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen Apr 29, 2024
50158a9
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 1, 2024
5366087
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 4, 2024
455b7a9
Merge branch 'pymc-labs:main' into bgbb_dist
ColtAllen May 22, 2024
c63e94e
remove commented code, add logp reference
ColtAllen May 26, 2024
60a1f55
fix latex docstring
ColtAllen May 26, 2024
783d6f5
notebook testing and misc edits
ColtAllen May 26, 2024
512aea6
revert latex in docstring
ColtAllen May 26, 2024
c376695
add ruff ignore comment
ColtAllen May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 186 additions & 6 deletions pymc_marketing/clv/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
import pymc as pm
import pytensor.tensor as pt
from pymc.distributions.continuous import PositiveContinuous
from pymc.distributions.dist_math import check_parameters
from pymc.distributions.distribution import Discrete
from pymc.distributions.dist_math import betaln, check_parameters
from pytensor import scan
from pytensor.tensor.random.op import RandomVariable

__all__ = [
"ContContract",
"ContNonContract",
"ParetoNBD",
]
__all__ = ["ContContract", "ContNonContract", "ParetoNBD", "BetaGeoBetaBinom"]


class ContNonContractRV(RandomVariable):
Expand Down Expand Up @@ -460,3 +458,185 @@ def logp(value, r, alpha, s, beta, T):
beta > 0,
msg="r > 0, alpha > 0, s > 0, beta > 0",
)


class BetaGeoBetaBinomRV(RandomVariable):
name = "beta_geo_beta_binom"
ndim_supp = 1
ndims_params = [0, 0, 0, 0, 0]
dtype = "floatX"
_print_name = ("BetaGeoBetaBinom", "\\operatorname{BetaGeoBetaBinom}")

def make_node(self, rng, size, dtype, alpha, beta, gamma, delta, T):
alpha = pt.as_tensor_variable(alpha)
beta = pt.as_tensor_variable(beta)
gamma = pt.as_tensor_variable(gamma)
delta = pt.as_tensor_variable(delta)
T = pt.as_tensor_variable(T)

return super().make_node(rng, size, dtype, alpha, beta, gamma, delta, T)

def __call__(self, alpha, beta, gamma, delta, T, size=None, **kwargs):
return super().__call__(alpha, beta, gamma, delta, T, size=size, **kwargs)

@classmethod
def rng_fn(cls, rng, alpha, beta, gamma, delta, T, size) -> np.ndarray:
size = pm.distributions.shape_utils.to_tuple(size)

alpha = np.asarray(alpha)
beta = np.asarray(beta)
gamma = np.asarray(gamma)
delta = np.asarray(delta)
T = np.asarray(T)

if size == ():
size = np.broadcast_shapes(
alpha.shape, beta.shape, gamma.shape, delta.shape, T.shape
)

alpha = np.broadcast_to(alpha, size)
beta = np.broadcast_to(beta, size)
gamma = np.broadcast_to(gamma, size)
delta = np.broadcast_to(delta, size)
T = np.broadcast_to(T, size)

output = np.zeros(shape=size + (2,))

purchase_prob = rng.beta(a=alpha, b=beta, size=size)
churn_prob = rng.beta(a=delta, b=gamma, size=size)

def sim_data(purchase_prob, churn_prob, T):
t = 0
n = 0
active = True

while t <= T and active:
t += 1
active = rng.binomial(1, churn_prob)
purchase = rng.binomial(1, purchase_prob)
if active and purchase:
n += 1

return np.array(
[
t,
n,
],
)
ColtAllen marked this conversation as resolved.
Show resolved Hide resolved

for index in np.ndindex(*size):
output[index] = sim_data(purchase_prob[index], churn_prob[index], T[index])

return output

def _supp_shape_from_params(*args, **kwargs):
return (2,)


beta_geo_beta_binom = BetaGeoBetaBinomRV()


# TODO: Edit likelihood expression in docstring
class BetaGeoBetaBinom(Discrete):
r"""
Population-level distribution class for a discrete, non-contractual, Beta-Geometric/Beta-Binomial process,
based on Fader, et al. in [1]_.

.. math::

\begin{align}
\text{if }\alpha > \beta: \\
\\
\mathbb{L}(r, \alpha, s, \beta | x, t_x, T) &=
\frac{\Gamma(r+x)\alpha^r\beta}{\Gamma(r)+(\alpha +t_x)^{r+s+x}}
[(\frac{s}{r+s+x})_2F_1(r+s+x,s+1;r+s+x+1;\frac{\alpha-\beta}{\alpha+t_x}) \\
&+ (\frac{r+x}{r+s+x})
\frac{_2F_1(r+s+x,s;r+s+x+1;\frac{\alpha-\beta}{\alpha+T})(\alpha +t_x)^{r+s+x}}
{(\alpha +T)^{r+s+x}}] \\
\\
\text{if }\beta >= \alpha: \\
\\
\mathbb{L}(r, \alpha, s, \beta | x, t_x, T) &=
\frac{\Gamma(r+x)\alpha^r\beta}{\Gamma(r)+(\beta +t_x)^{r+s+x}}
[(\frac{s}{r+s+x})_2F_1(r+s+x,r+x;r+s+x+1;\frac{\beta-\alpha}{\beta+t_x}) \\
&+ (\frac{r+x}{r+s+x})
\frac{_2F_1(r+s+x,r+x+1;r+s+x+1;\frac{\beta-\alpha}{\beta+T})(\beta +t_x)^{r+s+x}}
{(\beta +T)^{r+s+x}}]
\end{align}

======== ===============================================
Support :math:`t_j > 0` for :math:`j = 1, \dots, x`
Mean :math:`\mathbb{E}[X(t) | r, \alpha, s, \beta] = \frac{r\beta}{\alpha(s-1)}[1-(\frac{\beta}{\beta + t})^{s-1}]`
======== ===============================================

References
----------
.. [1] Fader, Peter S., Bruce G.S. Hardie, and Jen Shang (2010),
"Customer-Base Analysis in a Discrete-Time Noncontractual Setting,"
Marketing Science, 29 (6), 1086-1108. https://www.brucehardie.com/papers/020/fader_et_al_mksc_10.pdf

"""
ColtAllen marked this conversation as resolved.
Show resolved Hide resolved

rv_op = beta_geo_beta_binom

@classmethod
def dist(cls, alpha, beta, gamma, delta, T, **kwargs):
return super().dist([alpha, beta, gamma, delta, T], **kwargs)

def logp(value, alpha, beta, gamma, delta, T):
t_x = value[..., 0]
x = value[..., 1]

betaln_ab = betaln(alpha, beta)
betaln_gd = betaln(gamma, delta)

A = (
betaln(alpha + x, beta + T - x)
- betaln_ab
+ betaln(gamma, delta + T)
- betaln_gd
)

# TODO: The -1 may need to be omitted for the sequences loop
t_recent = T - t_x - 1

# TODO: Resolve this scan loop and logp will be ready
def _B_iter(ix):
return betaln(alpha + x, beta + t_x - x + ix) + betaln(
gamma + 1, delta + t_x + ix
)

iter_result, _ = scan(
fn=_B_iter,
# outputs_info=pt.zeros(1),
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
sequences=pt.arange(t_recent),
ricardoV94 marked this conversation as resolved.
Show resolved Hide resolved
)

scan_sum = iter_result.sum()

# B_sum = function(inputs=[t_recent], outputs=[scan_sum])

B = scan_sum - betaln_gd - betaln_ab

logp = pt.logaddexp(A, B)

logp = pt.switch(
pt.or_(
pt.or_(
pt.lt(t_x, 0),
pt.lt(x, 0),
),
pt.gt(t_x, T),
),
-np.inf,
logp,
)

return check_parameters(
logp,
alpha > 0,
beta > 0,
gamma > 0,
delta > 0,
msg="alpha > 0, beta > 0, gamma > 0, delta > 0",
)
120 changes: 119 additions & 1 deletion tests/clv/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
from numpy.testing import assert_almost_equal
from pymc import Model

from pymc_marketing.clv.distributions import ContContract, ContNonContract, ParetoNBD
from pymc_marketing.clv.distributions import (
BetaGeoBetaBinom,
ContContract,
ContNonContract,
ParetoNBD,
)


class TestContNonContract:
Expand Down Expand Up @@ -253,3 +258,116 @@ def test_pareto_nbd_sample_prior(
prior = pm.sample_prior_predictive(samples=100)

assert prior["prior"]["pareto_nbd"][0].shape == (100,) + expected_size


class TestBetaGeoBetaBinom:
@pytest.mark.parametrize(
"value, alpha, beta, gamma, delta, T, logp",
[
(np.array([1, 1]), 1.204, 0.750, 0.657, 2.783, 6, np.log(0.1014)),
(
np.array([2, 2]),
[1.204, 1.204],
0.750,
0.657,
2.783,
6,
[np.log(0.0492), np.log(0.0492)],
),
(
np.array([[0, 0], [5, 1]]),
[1.204, 1.204],
0.750,
0.657,
[2.783, 2.783],
6,
[np.log(0.3111), np.log(0.0085)],
),
(
np.array([[6, 5], [3, 2], [5, 5]]),
1.204,
0.750,
0.657,
2.783,
6,
[np.log(0.0136) + np.log(0.0109) + np.log(0.0243)],
),
(
np.array([6, 6]),
1.204,
0.750,
0.657,
np.full((5, 3), 2.783),
6,
np.full((5, 3), np.log(0.1129)),
),
],
)
def test_beta_geo_beta_binom(self, value, alpha, beta, gamma, delta, T, logp):
# comparisons to lifetimes loglike difficult due to differences in array broadcasting.
# Expected logp values can be found in http://brucehardie.com/notes/010/
with Model():
beta_geo_beta_binom = BetaGeoBetaBinom(
"beta_geo_beta_binom",
alpha=alpha,
beta=beta,
gamma=gamma,
delta=delta,
T=T,
)
pt = {"beta_geo_beta_binom": value}

assert_almost_equal(
pm.logp(beta_geo_beta_binom, value).eval(),
logp,
decimal=6,
err_msg=str(pt),
)

def test_beta_geo_beta_binom_invalid(self):
beta_geo_beta_binom = BetaGeoBetaBinom.dist(
alpha=1.20, beta=0.75, gamma=0.66, delta=2.78, T=6
)
assert pm.logp(beta_geo_beta_binom, np.array([3, -1])).eval() == -np.inf
assert pm.logp(beta_geo_beta_binom, np.array([-1, 1.5])).eval() == -np.inf
assert pm.logp(beta_geo_beta_binom, np.array([11, 1.5])).eval() == -np.inf

@pytest.mark.parametrize(
"alpha_size, beta_size, gamma_size, delta_size, beta_geo_beta_binom_size, expected_size",
[
(None, None, None, None, None, (2,)),
((5,), None, None, None, None, (5, 2)),
(None, (5,), None, None, (5,), (5, 2)),
(None, None, (5, 1), (1, 3), (5, 3), (5, 3, 2)),
(None, None, None, None, (5, 3), (5, 3, 2)),
],
)
def test_beta_geo_beta_binom_sample_prior(
self,
alpha_size,
beta_size,
gamma_size,
delta_size,
beta_geo_beta_binom_size,
expected_size,
):
with Model():
alpha = pm.Gamma(name="alpha", alpha=5, beta=1, size=alpha_size)
beta = pm.Gamma(name="beta", alpha=5, beta=1, size=beta_size)
gamma = pm.Gamma(name="gamma", alpha=5, beta=1, size=gamma_size)
delta = pm.Gamma(name="delta", alpha=5, beta=1, size=delta_size)

T = pm.MutableData(name="T", value=np.array(10))

BetaGeoBetaBinom(
name="beta_geo_beta_binom",
alpha=alpha,
beta=beta,
gamma=gamma,
delta=delta,
T=T,
size=beta_geo_beta_binom_size,
)
prior = pm.sample_prior_predictive(samples=100)

assert prior["prior"]["beta_geo_beta_binom"][0].shape == (100,) + expected_size
Loading