Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add baselined saturation #498

Merged
merged 14 commits into from
Jan 26, 2024
199 changes: 197 additions & 2 deletions pymc_marketing/mmm/transformers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Union
from typing import NamedTuple, Union

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -302,7 +302,50 @@ def logistic_saturation(x, lam: Union[npt.NDArray[np.float_], float] = 0.5):
return (1 - pt.exp(-lam * x)) / (1 + pt.exp(-lam * x))


def tanh_saturation(x, b: float = 0.5, c: float = 0.5):
class TanhSaturationParameters(NamedTuple):
b: pt.TensorLike
"""Saturation.
"""
c: pt.TensorLike
"""Customer Aquisition Cost at 0.
"""

def baseline(self, x0: pt.TensorLike) -> "TanhSaturationBaselinedParameters":
"""Change the parameterization to baselined at :math:`x_0`."""
y_ref = tanh_saturation(x0, self.b, self.c)
gain_ref = y_ref / x0
r_ref = y_ref / self.b
return TanhSaturationBaselinedParameters(x0, gain_ref, r_ref)


class TanhSaturationBaselinedParameters(NamedTuple):
x0: pt.TensorLike
"""Baseline spend.
"""
gain: pt.TensorLike
"""ROAS at :math:`x_0`.
"""
r: pt.TensorLike
"""Overspend Fraction.
"""

def debaseline(self) -> TanhSaturationParameters:
"""Change the parameterization to baselined to be classic saturation and cac."""
saturation = (self.gain * self.x0) / self.r
cac = self.r / (self.gain * pt.arctanh(self.r))
return TanhSaturationParameters(saturation, cac)

def rebaseline(self, x1: pt.TensorLike) -> "TanhSaturationBaselinedParameters":
"""Change the parameterization to baselined at :math:`x_1`."""
params = self.debaseline()
return params.baseline(x1)


def tanh_saturation(
x: pt.TensorLike,
b: pt.TensorLike = 0.5,
c: pt.TensorLike = 0.5,
) -> pt.TensorVariable:
R"""Tanh saturation transformation.

.. math::
Expand Down Expand Up @@ -355,3 +398,155 @@ def tanh_saturation(x, b: float = 0.5, c: float = 0.5):
See https://www.pymc-labs.io/blog-posts/reducing-customer-acquisition-costs-how-we-helped-optimizing-hellofreshs-marketing-budget/ # noqa: E501
"""
return b * pt.tanh(x / (b * c))


def tanh_saturation_baselined(
x: pt.TensorLike,
x0: pt.TensorLike,
gain: pt.TensorLike = 0.5,
r: pt.TensorLike = 0.5,
) -> pt.TensorVariable:
r"""
Baselined Tanh Saturation.

This parameterization that is easier than :func:`tanh_saturation`
to use for industry applications where domain knowledge is an essence.

In a nutshell, it is an alternative parameterization of the reach function is given by:

.. math::

\begin{align}
c_0 &= \frac{r}{g \cdot \arctan(r)} \\
\beta &= \frac{g \cdot x_0}{r} \\
\operatorname{saturation}(x, \beta, c_0) &= \beta \cdot \tanh \left( \frac{x}{c_0 \cdot \beta} \right)
\end{align}

where:

- :math:`x_0` is the "reference point". This is a point chosen
by the user (not given a prior) where they expect most of their data to lie.
For example, if you're spending between 50 and 150 dollars on a particular channel,
you might choose :math:`x_0 = 100`.
Suggested value is median channel spend: ``np.median(spend)``.

- :math:`g` is the "gain", which is the value of the CAC (:math:`c_0`) at the reference point.
You have to set a prior on what you think the CAC is when you spend :math:`x_0 = 100`.
Imagine you have four advertising channels, and you acquired 1000 new users.
If each channel performed equally well, and advertising drove all sales, you might expect
that you gained 250 users from each channel. Here, your "gain" would be :math:`250 / 100 = 2.5`.
Suggested prior is ``pm.Exponential``
- :math:`r`, the overspend fraction is telling you where the reference point is.

- :math:`0` - we can increase our budget by a lot to reach the saturated region,
the diminishing returns are not visible yet.
- :math:`1` - the reference point is already in the saturation region
and additional dollar spend will not lead to any new users.
- :math:`0.8`, you can still increase acquired users by :math:`50\%` as much
you get in the reference point by increasing the budget.
:math:`x_0` effect is 20% away from saturation point

Suggested prior is ``pm.Beta``

.. note::

The reference point :math:`x_0` has to be set within the range of the actual spends.
As in, you buy ads three times and spend :math:`5`, :math:`6` and :math:`7` dollars,
:math:`x_0` has to be set within :math:`[5, 7]`, so not :math:`4` not :math:`8`.
Otherwise the posterior of r and gain becomes a skinny diagonal line.
It could be very relevant if there is very little spend observations for a particular channel.

The original reach or saturation function used in an MMM is formulated as

.. math::

\operatorname{saturation}(x, \beta, c_0) = \beta \cdot \tanh \left( \frac{x}{c_0 \cdot \beta} \right)

where:

- :math:`\beta` is the saturation, or the limit of the total number
of new users obtained when an infinite number of dollars are spent on that channel.
- :math:`c_0` is the cost per acquisition (CAC0), so the initial cost per new user.
- :math:`\frac{1}{c_0}` is the inverse of the CAC0, so it's the number of new
users we might expect after spending our first dollar.

.. plot::
:context: close-figs

import matplotlib.pyplot as plt
import numpy as np
import arviz as az
from pymc_marketing.mmm.transformers import (
tanh_saturation_baselined,
tanh_saturation,
TanhSaturationBaselinedParameters,
)

gain = 1
overspend_fraction = 0.7
x_baseline = 400

params = TanhSaturationBaselinedParameters(x_baseline, gain, overspend_fraction)

x = np.linspace(0, 1000)
y = tanh_saturation_baselined(x, *params).eval()

saturation, cac0 = params.debaseline()
cac0 = cac0.eval()
saturated_ref = tanh_saturation(x_baseline, saturation, cac0).eval()

plt.plot(x, y);
plt.axvline(x_baseline, linestyle="dashed", color="red", label="baseline")
plt.plot(x, x * gain, linestyle="dashed", label="gain (slope)");
plt.axhline(saturated_ref, linestyle="dashed", label="f(reference)")
plt.plot(x, x / cac0, linestyle="dotted", label="1/cac (slope)");
plt.axhline(saturation, linestyle="dotted", label="saturation")
plt.fill_between(x, saturated_ref, saturation, alpha=0.1, label="underspend fraction")
plt.fill_between(x, saturated_ref, alpha=0.1, label="overspend fraction")
plt.legend()
plt.show()

Examples
--------

.. code-block:: python

import pymc as pm
import numpy as np

x_in = np.exp(3+np.random.randn(100))
true_cac = 1
true_saturation = 100
y_out = abs(np.random.normal(tanh_saturation(x_in, true_saturation, true_cac).eval(), 0.1))

with pm.Model() as model_reparam:
r = pm.Uniform("r")
gain = pm.Exponential("gain", 1)
input = pm.ConstantData("spent", x_in)
response = pm.ConstantData("response", y_out)
sigma = pm.HalfNormal("n")
output = tanh_saturation_baselined(input, np.median(x_in), gain, r)
pm.Normal("output", output, sigma, observed=response)
trace = pm.sample()

Parameters
----------
x : tensor
Input tensor.
x0: tensor
Baseline for saturation.
gain : tensor, by default 0.5
ROAS at the baseline point, mathematically as :math:`gain = f(x0) / x0`.
r : tensor, by default 0.5
The overspend fraction, mathematically as :math:`r = f(x0) / \text{saturation}`.

Returns
-------
tensor
Transformed tensor.

References
----------
Developed by Max Kochurov and Aziz Al-Maeeni doing innovative work in `PyMC Labs <pymc-labs.com>`_.
"""
return gain * x0 * pt.tanh(x * pt.arctanh(r) / x0) / r
59 changes: 58 additions & 1 deletion tests/mmm/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
import pytensor
import pytensor.tensor as pt
import pytest
from pytensor.tensor.var import TensorVariable
from pytensor.tensor.variable import TensorVariable

from pymc_marketing.mmm.transformers import (
ConvMode,
TanhSaturationParameters,
batched_convolution,
delayed_adstock,
geometric_adstock,
logistic_saturation,
tanh_saturation,
tanh_saturation_baselined,
)


Expand Down Expand Up @@ -236,6 +238,61 @@ def test_tanh_saturation_inverse(self, x, b, c):
y_inv = (b * c) * pt.arctanh(y / b)
np.testing.assert_array_almost_equal(x=x, y=y_inv.eval(), decimal=6)

@pytest.mark.parametrize(
"x, x0, gain, r",
[
(np.ones(shape=(100)), 10, 0.5, 0.5),
(np.zeros(shape=(100)), 10, 0.6, 0.3),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 0.001, 0.01),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 0.1, 0.01),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 1, 0.25),
],
)
def test_tanh_saturation_baselined_range(self, x, x0, gain, r):
b = (gain * x0) / r
assert tanh_saturation_baselined(x=x, x0=x0, gain=gain, r=r).eval().max() <= b
assert tanh_saturation_baselined(x=x, x0=x0, gain=gain, r=r).eval().min() >= -b

@pytest.mark.parametrize(
"x, x0, gain, r",
[
(np.ones(shape=(100)), 10, 0.5, 0.5),
(np.zeros(shape=(100)), 10, 0.6, 0.3),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 0.001, 0.1),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 0.1, 0.01),
(np.linspace(start=0.0, stop=100.0, num=50), 10, 1, 0.25),
],
)
def test_tanh_saturation_baselined_inverse(self, x, x0, gain, r):
y = tanh_saturation_baselined(x=x, x0=x0, gain=gain, r=r)
b = (gain * x0) / r
c = r / (gain * pt.arctanh(r))
y_inv = (b * c) * pt.arctanh(y / b)
np.testing.assert_array_almost_equal(x=x, y=y_inv.eval(), decimal=6)

@pytest.mark.parametrize(
"x, b, c",
[
(np.linspace(start=0.0, stop=10.0, num=50), 20, 0.5),
(np.linspace(start=0.0, stop=10.0, num=50), 100, 0.5),
(np.linspace(start=0.0, stop=10.0, num=50), 100, 1),
],
)
def test_tanh_saturation_parameterization_transformation(self, x, b, c):
param_classic = TanhSaturationParameters(b, c)
param_x0 = param_classic.baseline(5)
param_x1 = param_x0.rebaseline(6)
param_classic1 = param_x1.debaseline()
y1 = tanh_saturation(x, *param_classic).eval()
y2 = tanh_saturation_baselined(x, *param_x0).eval()
y3 = tanh_saturation_baselined(x, *param_x1).eval()
y4 = tanh_saturation(x, *param_classic1).eval()
np.testing.assert_allclose(y1, y2)
np.testing.assert_allclose(y2, y3)
np.testing.assert_allclose(y3, y4)
np.testing.assert_allclose(param_classic1.b.eval(), b)
np.testing.assert_allclose(param_classic1.c.eval(), c)


class TestTransformersComposition:
@pytest.mark.parametrize(
Expand Down
Loading