Skip to content

Commit

Permalink
Merge branch 'pymc-labs:main' into pareto_covar
Browse files Browse the repository at this point in the history
  • Loading branch information
ColtAllen authored Feb 9, 2024
2 parents fd3d148 + a224cfe commit a63b0bd
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ci:

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.14
rev: v0.2.0
hooks:
- id: ruff
args: ["--fix", "--show-source"]
Expand Down
26 changes: 24 additions & 2 deletions pymc_marketing/clv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,31 @@ def _squeeze_dims(x: xarray.DataArray):

clv = xarray.DataArray(0.0)

# FIXME: This is a hotfix for ParetoNBDModel, as it has a different API from BetaGeoModel
# We should harmonize them!
from pymc_marketing.clv.models import ParetoNBDModel

if isinstance(transaction_model, ParetoNBDModel):
transaction_data = pd.DataFrame(
{
"customer_id": customer_id,
"frequency": frequency,
"recency": recency,
"T": T,
}
)

def expected_purchases(*, t, **kwargs):
return transaction_model.expected_purchases(
future_t=t,
data=transaction_data,
)
else:
expected_purchases = transaction_model.expected_num_purchases

# TODO: Vectorize computation so that we perform a single call to expected_num_purchases
prev_expected_num_purchases = _squeeze_dims(
transaction_model.expected_num_purchases(
expected_purchases(
customer_id=customer_id,
frequency=frequency,
recency=recency,
Expand All @@ -110,7 +132,7 @@ def _squeeze_dims(x: xarray.DataArray):
for i in steps * factor:
# since the prediction of number of transactions is cumulative, we have to subtract off the previous periods
new_expected_num_purchases = _squeeze_dims(
transaction_model.expected_num_purchases(
expected_purchases(
customer_id=customer_id,
frequency=frequency,
recency=recency,
Expand Down
54 changes: 54 additions & 0 deletions pymc_marketing/mmm/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,60 @@ def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5):
return (1 - pt.exp(-lam * x)) / (1 + pt.exp(-lam * x))


def scale_preserving_logistic_saturation(x, m: Union[npt.NDArray[np.float_], float]):
"""Scale preserving logistic transformation.
This single-parameter saturation function maps its input to the range
:math:`[0, m]`, where :math:`f(x) \leq x` for all :math:`x`. It can be interpreted as mapping
from spend to "effective spend".
Properties:
* Output is on scale with input values. Simplifies prior specification.
* Intuitive parameter interpretation: `m` is the "maximum achievable effect".
* Slope of curve never exceeds 1. Thus "effective spend" <= "actual spend".
.. math::
f(x) = m \\left( \\frac{1 - e^{-\\frac{2}{m} x}}{1 + e^{-\\frac{2}{m} x}} \\right)
.. plot::
:context: close-figs
import matplotlib.pyplot as plt
import numpy as np
import arviz as az
from pymc_marketing.mmm.transformers import scale_preserving_logistic_saturation
plt.style.use('arviz-darkgrid')
m = np.array([500, 1000, 2000, 4000, 8000])
x = np.linspace(0, 10000, 400)
ax = plt.subplot(111)
for l in m:
y = scale_preserving_logistic_saturation(x, m=l).eval()
plt.plot(x, y, label=f'm = {l}')
plt.xlabel('x', fontsize=12)
plt.ylabel('f(x)', fontsize=12)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.show()
Parameters
----------
x : tensor
Input tensor.
m : float or array-like
Saturation parameter, characterizing the asymptote of the function value.
Returns
-------
tensor
Transformed tensor.
""" # noqa: W605
if m == 0:
return pt.zeros_like(x)
return m * (1 - pt.exp(-2 / m * x)) / (1 + pt.exp(-2 / m * x))


class TanhSaturationParameters(NamedTuple):
b: pt.TensorLike
"""Saturation.
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.3
0.4.0
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ maintainers = [{ name = "PyMC Labs", email = "[email protected]" }]
dependencies = [
"arviz>=0.13.0",
"matplotlib>=3.5.1",
"numpy>=1.17,<1.25",
"numpy>=1.17",
"pandas",
# NOTE: Keep minimum pymc version in sync with ci.yml `OLDEST_PYMC_VERSION`
"pymc>=5.8.2",
Expand Down
101 changes: 73 additions & 28 deletions tests/clv/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import xarray
from pandas.testing import assert_frame_equal

from pymc_marketing.clv import BetaGeoModel, GammaGammaModel
from pymc_marketing.clv import BetaGeoModel, GammaGammaModel, ParetoNBDModel
from pymc_marketing.clv.utils import (
_find_first_transactions,
clv_summary,
Expand Down Expand Up @@ -45,6 +45,7 @@ def test_summary_data() -> pd.DataFrame:
rng = np.random.default_rng(14)
df = pd.read_csv("tests/clv/datasets/test_summary_data.csv", index_col=0)
df["monetary_value"] = rng.lognormal(size=(len(df)))
df["customer_id"] = df.index
return df


Expand Down Expand Up @@ -82,6 +83,35 @@ def fitted_bg(test_summary_data) -> BetaGeoModel:
return model


@pytest.fixture(scope="module")
def fitted_pnbd(test_summary_data) -> ParetoNBDModel:
rng = np.random.default_rng(45)

model_config = {
# Narrow Gaussian centered at MLE params from lifetimes ParetoNBDFitter
"r_prior": {"dist": "DiracDelta", "kwargs": {"c": 0.5534}},
"alpha_prior": {"dist": "DiracDelta", "kwargs": {"c": 10.5802}},
"s_prior": {"dist": "DiracDelta", "kwargs": {"c": 0.6061}},
"beta_prior": {"dist": "DiracDelta", "kwargs": {"c": 11.6562}},
}
pnbd_model = ParetoNBDModel(
data=test_summary_data,
model_config=model_config,
)
pnbd_model.build_model()

# Mock an idata object for tests requiring a fitted model
fake_fit = pm.sample_prior_predictive(
samples=50, model=pnbd_model.model, random_seed=rng
)
fake_fit.add_groups(dict(posterior=fake_fit.prior))
pnbd_model.idata = fake_fit
pnbd_model.set_idata_attrs(pnbd_model.idata)
pnbd_model._add_fit_data_group(pnbd_model.data)

return pnbd_model


@pytest.fixture(scope="module")
def fitted_gg(test_summary_data) -> GammaGammaModel:
rng = np.random.default_rng(40)
Expand Down Expand Up @@ -136,7 +166,7 @@ def transaction_data() -> pd.DataFrame:


class TestCustomerLifetimeValue:
def test_customer_lifetime_value_with_known_values(
def test_customer_lifetime_value_bg_with_known_values(
self, test_summary_data, fitted_bg
):
# Test borrowed from
Expand All @@ -159,6 +189,7 @@ def test_customer_lifetime_value_with_known_values(
discount_rate=0.0,
).mean(("chain", "draw"))
np.testing.assert_almost_equal(clv_d0, expected, decimal=5)

# discount_rate=1 means the clv will halve over a period
clv_d1 = (
customer_lifetime_value(
Expand Down Expand Up @@ -210,23 +241,25 @@ def test_customer_lifetime_value_with_known_values(
)
np.testing.assert_allclose(clv_t2_d1, expected / 2.0 + expected / 4.0, rtol=0.1)

def test_customer_lifetime_value_gg_with_bgf(
self, test_summary_data, fitted_gg, fitted_bg
@pytest.mark.parametrize("transaction_model", ("fitted_bg", "fitted_pnbd"))
def test_customer_lifetime_value_as_gg_method(
self, request, test_summary_data, fitted_gg, transaction_model
):
transaction_model = request.getfixturevalue(transaction_model)
t = test_summary_data.head()

ggf_clv = fitted_gg.expected_customer_lifetime_value(
transaction_model=fitted_bg,
customer_id=t.index,
transaction_model=transaction_model,
customer_id=t["customer_id"],
frequency=t["frequency"],
recency=t["recency"],
T=t["T"],
mean_transaction_value=t["monetary_value"],
)

utils_clv = customer_lifetime_value(
transaction_model=fitted_bg,
customer_id=t.index,
transaction_model=transaction_model,
customer_id=t["customer_id"],
frequency=t["frequency"],
recency=t["recency"],
T=t["T"],
Expand All @@ -238,36 +271,40 @@ def test_customer_lifetime_value_gg_with_bgf(
)
np.testing.assert_equal(ggf_clv.values, utils_clv.values)

@pytest.mark.parametrize("bg_map", (True, False))
@pytest.mark.parametrize("gg_map", (True, False))
@pytest.mark.parametrize("transaction_model_map", (True, False))
@pytest.mark.parametrize("transaction_model", ("fitted_bg", "fitted_pnbd"))
def test_map_posterior_mix_fit_types(
self, fitted_bg, fitted_gg, test_summary_data, bg_map, gg_map
self,
request,
test_summary_data,
fitted_gg,
gg_map,
transaction_model_map,
transaction_model,
):
"""Test we can mix a model that was fit with MAP and one that was fit with sample."""
transaction_model = request.getfixturevalue(transaction_model)
t = test_summary_data.head()

# Copy model with thinned chain/draw as would be obtained from MAP
if bg_map:
bg = BetaGeoModel(fitted_bg.data, model_config=fitted_bg.model_config)
bg.build_model()
bg.idata = fitted_bg.idata.sel(chain=slice(0, 1), draw=slice(0, 1))
else:
bg = fitted_bg
if transaction_model_map:
transaction_model = transaction_model._build_with_idata(
transaction_model.idata.sel(chain=slice(0, 1), draw=slice(0, 1))
)

if gg_map:
gg = GammaGammaModel(fitted_gg.data, model_config=fitted_gg.model_config)
gg.build_model()
gg.idata = fitted_gg.idata.sel(chain=slice(0, 1), draw=slice(0, 1))
else:
gg = fitted_gg
fitted_gg = fitted_gg._build_with_idata(
fitted_gg.idata.sel(chain=slice(0, 1), draw=slice(0, 1))
)

res = customer_lifetime_value(
transaction_model=bg,
customer_id=t.index,
transaction_model=transaction_model,
customer_id=t["customer_id"],
frequency=t["frequency"],
recency=t["recency"],
T=t["T"],
monetary_value=gg.expected_customer_spend(
monetary_value=fitted_gg.expected_customer_spend(
t.index,
mean_transaction_value=t["monetary_value"],
frequency=t["frequency"],
Expand All @@ -276,11 +313,19 @@ def test_map_posterior_mix_fit_types(

assert res.dims == ("chain", "draw", "customer_id")

def test_clv_after_thinning(self, test_summary_data, fitted_gg, fitted_bg):
@pytest.mark.parametrize("transaction_model", ("fitted_bg", "fitted_pnbd"))
def test_clv_after_thinning(
self,
request,
test_summary_data,
fitted_gg,
transaction_model,
):
transaction_model = request.getfixturevalue(transaction_model)
t = test_summary_data.head()

ggf_clv = fitted_gg.expected_customer_lifetime_value(
transaction_model=fitted_bg,
transaction_model=transaction_model,
customer_id=t.index,
frequency=t["frequency"],
recency=t["recency"],
Expand All @@ -289,9 +334,9 @@ def test_clv_after_thinning(self, test_summary_data, fitted_gg, fitted_bg):
)

fitted_gg_thinned = fitted_gg.thin_fit_result(keep_every=10)
fitted_bg_thinned = fitted_bg.thin_fit_result(keep_every=10)
transaction_model_thinned = transaction_model.thin_fit_result(keep_every=10)
ggf_clv_thinned = fitted_gg_thinned.expected_customer_lifetime_value(
transaction_model=fitted_bg_thinned,
transaction_model=transaction_model_thinned,
customer_id=t.index,
frequency=t["frequency"],
recency=t["recency"],
Expand Down
14 changes: 7 additions & 7 deletions tests/mmm/test_delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
rng: np.random.Generator = np.random.default_rng(seed=seed)


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def generate_data():
def _generate_data(date_data: pd.DatetimeIndex) -> pd.DataFrame:
n: int = date_data.size
Expand All @@ -37,7 +37,7 @@ def _generate_data(date_data: pd.DatetimeIndex) -> pd.DataFrame:
return _generate_data


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def toy_X(generate_data) -> pd.DataFrame:
date_data: pd.DatetimeIndex = pd.date_range(
start="2019-06-01", end="2021-12-31", freq="W-MON"
Expand Down Expand Up @@ -80,12 +80,12 @@ def model_config_requiring_serialization() -> Dict:
return model_config


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def toy_y(toy_X: pd.DataFrame) -> pd.Series:
return pd.Series(data=rng.integers(low=0, high=100, size=toy_X.shape[0]))


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def mmm() -> DelayedSaturatedMMM:
return DelayedSaturatedMMM(
date_column="date",
Expand All @@ -95,7 +95,7 @@ def mmm() -> DelayedSaturatedMMM:
)


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def mmm_with_fourier_features() -> DelayedSaturatedMMM:
return DelayedSaturatedMMM(
date_column="date",
Expand All @@ -106,15 +106,15 @@ def mmm_with_fourier_features() -> DelayedSaturatedMMM:
)


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def mmm_fitted(
mmm: DelayedSaturatedMMM, toy_X: pd.DataFrame, toy_y: pd.Series
) -> DelayedSaturatedMMM:
mmm.fit(X=toy_X, y=toy_y, target_accept=0.8, draws=3, chains=2)
return mmm


@pytest.fixture(scope="class")
@pytest.fixture(scope="module")
def mmm_fitted_with_fourier_features(
mmm_with_fourier_features: DelayedSaturatedMMM,
toy_X: pd.DataFrame,
Expand Down
Loading

0 comments on commit a63b0bd

Please sign in to comment.