pymc-labs · twiecki · Oct 2, 2023 · Jul 25, 2023 · Jul 25, 2023 · Jul 25, 2023
diff --git a/pymc_marketing/mmm/base.py b/pymc_marketing/mmm/base.py
@@ -19,6 +19,8 @@
 from sklearn.preprocessing import FunctionTransformer
 from xarray import DataArray, Dataset
 
+from pymc_marketing.mmm.budget_optimizer import budget_allocator
+from pymc_marketing.mmm.utils import estimate_menten_parameters, michaelis_menten
 from pymc_marketing.mmm.validating import (
     ValidateChannelColumns,
     ValidateDateColumn,
@@ -472,8 +474,105 @@ def compute_channel_contribution_original_scale(self) -> DataArray:
             coords=channel_contribution.coords,
         )
 
-    def plot_direct_contribution_curves(self) -> plt.Figure:
-        """Plots the direct contribution curves. The term "direct" refers to the fact
+    def _plot_estimations(self, x: np.ndarray, channel: str, i: int) -> plt.Figure:
+
+        channel_contributions = self.compute_channel_contribution_original_scale().mean(
+            ["chain", "draw"]
+        )
+
+        fig_estimations, ax_estimations = plt.subplots(figsize=(8, 6))
+
+        L, k = estimate_menten_parameters(channel, self.X, channel_contributions)
+        plateau_x = k * (0.99 * L / (L * 0.01))
+        elbow_y = michaelis_menten(k, L, k)
+
+        x_fit = np.linspace(0, plateau_x - (max(x) * 2), 1000)
+        y_fit = michaelis_menten(x_fit, L, k)
+
+        ax_estimations.plot(x_fit, y_fit, color=f"C{i}", label="Fit Curve", alpha=0.6)
+
+        ax_estimations.plot(
+            k,
+            elbow_y,
+            "go",
+            color=f"C{i}",
+            markerfacecolor="white",
+        )
+
+        ax_estimations.set(xlabel="Spent", ylabel="Contribution")
+        ax_estimations.legend()
+
+        return fig_estimations
+
+    def budget_allocation(
+        self,
+        total_budget: int,
+        parameters: Optional[Dict[str, Tuple[float, float]]],
+        budget_bounds: Optional[Dict[str, Tuple[float, float]]],
+    ) -> pd.DataFrame:
+        """
+        Allocate the budget optimally among different channels based on estimations and budget constraints.
+
+        Parameters
+        ----------
+        total_budget : int, requiere
+            The total budget available for allocation.
+        parameters : dict, requiere
+            A DataFrame containing estimations and information about different channels.
+        budget_bounds : dict, optional
+            A dictionary specifying the budget bounds for each channel.
+
+        Returns
+        -------
+        Dict
+            A dictionary containing the allocated budget and contribution information.
+
+        Raises
+        ------
+        ValueError
+            If any of the required parameters are not provided or have an incorrect type.
+        """
+        if not isinstance(budget_bounds, dict):
+            raise ValueError("The 'budget_bounds' parameter must be a dictionary.")
+
+        if not isinstance(total_budget, (int, float)):
+            raise ValueError(
+                "The 'total_budget' parameter must be an integer or float."
+            )
+
+        return budget_allocator(
+            total_budget=total_budget,
+            channels=self.channel_columns,
+            parameters=parameters,
+            budget_ranges=budget_bounds,
+        )
+
+    def compute_channel_estimate_points_original_scale(self) -> Dict:
+        """
+        Estimate optimal and plateau points for each channel.
+
+        Returns
+        -------
+        pd.DataFrame
+            A DataFrame with the estimated points.
+        """
+        parameters = {}
+        channel_contributions = self.compute_channel_contribution_original_scale().mean(
+            ["chain", "draw"]
+        )
+
+        for channel in self.channel_columns:
+            parameters[channel] = estimate_menten_parameters(
+                channel, self.X, channel_contributions
+            )
+
+        return parameters
+
+    def plot_direct_contribution_curves(
+        self, show_estimations: bool = False
+    ) -> plt.Figure:
+        """
+        Plots the direct contribution curves. The term "direct" refers to the fact
         we plots costs vs immediate returns and we do not take into account the lagged
         effects of the channels e.g. adstock transformations.
 
@@ -485,6 +584,7 @@ def plot_direct_contribution_curves(self) -> plt.Figure:
         channel_contributions = self.compute_channel_contribution_original_scale().mean(
             ["chain", "draw"]
         )
+
         fig, axes = plt.subplots(
             nrows=self.n_channel,
             ncols=1,
@@ -496,24 +596,27 @@ def plot_direct_contribution_curves(self) -> plt.Figure:
 
         for i, channel in enumerate(self.channel_columns):
             ax = axes[i]
+
             if self.X is not None:
-                sns.regplot(
-                    x=self.X[self.channel_columns].to_numpy()[:, i],
-                    y=channel_contributions.sel(channel=channel),
-                    color=f"C{i}",
-                    order=2,
-                    ci=None,
-                    line_kws={
-                        "linestyle": "--",
-                        "alpha": 0.5,
-                        "label": "quadratic fit",
-                    },
-                    ax=ax,
-                )
-            ax.legend(loc="upper left")
-            ax.set(title=f"{channel}", xlabel="total_cost_eur")
+                x = self.X[self.channel_columns].to_numpy()[:, i]
+                y = channel_contributions.sel(channel=channel).to_numpy()
+
+                ax.scatter(x, y, label=f"{channel}", color=f"C{i}")
+
+                if show_estimations:
+                    fig_estimations = self._plot_estimations(x, channel, i)
+                    fig.append(fig_estimations)
+
+            ax.legend(
+                loc="upper left",
+                facecolor="white",
+                title=f"{channel} Legend",
+                fontsize="small",
+            )
+
+            ax.set(xlabel="Spent", ylabel="Contribution")
 
-        fig.suptitle("Contribution Plots", fontsize=16)
+        fig.suptitle("Direct response curves", fontsize=16)
         return fig
 
     def compute_mean_contributions_over_time(

diff --git a/pymc_marketing/mmm/budget_optimizer.py b/pymc_marketing/mmm/budget_optimizer.py
@@ -0,0 +1,111 @@
+# optimization_utils.py
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+from pandas import DataFrame
+from scipy.optimize import minimize
+
+from pymc_marketing.mmm.utils import michaelis_menten
+
+
+def calculate_expected_contribution(parameters, optimal_budget):
+    """
+    Calculate the total expected contribution of budget allocations across various channels.
+
+    Returns
+    -------
+    dict
+        A dictionary with channels as keys and their respective contributions as values.
+        The key 'total' contains the total expected contribution.
+    """
+
+    total_expected_contribution = 0
+    contributions = {}
+
+    for channel, budget in optimal_budget.items():
+        L, k = parameters[channel]
+        contributions[channel] = michaelis_menten(budget, L, k)
+        total_expected_contribution += contributions[channel]
+
+    contributions["total"] = total_expected_contribution
+
+    return contributions
+
+
+def objective_distribution(x, channels, parameters):
+    """
+    Calculate the objective function value for a given budget distribution.
+
+    Parameters
+    ----------
+    x : list of float
+        The budget distribution across channels.
+
+    Returns
+    -------
+    float
+        The value of the objective function given the budget distribution.
+    """
+
+    sum_contributions = 0
+
+    for channel, budget in zip(channels, x):
+        L, k = parameters[channel]
+        sum_contributions += michaelis_menten(budget, L, k)
+
+    return -1 * sum_contributions
+
+
+def optimize_budget_distribution(total_budget, budget_ranges, parameters, channels):
+    """
+    Calculate the optimal budget distribution that minimizes the objective function.
+
+    Returns
+    -------
+    dict
+        A dictionary with channels as keys and the optimal budget for each channel as values.
+    """
+
+    if budget_ranges is None:
+        budget_ranges = {
+            channel: [0, min(total_budget, parameters[channel][0])]
+            for channel in channels
+        }
+
+    initial_guess = [total_budget / len(channels)] * len(channels)
+
+    bounds = [budget_ranges[channel] for channel in channels]
+
+    constraints = {"type": "eq", "fun": lambda x: np.sum(x) - total_budget}
+
+    result = minimize(
+        objective_distribution,
+        initial_guess,
+        args=(channels, parameters),
+        method="SLSQP",
+        bounds=bounds,
+        constraints=constraints,
+    )
+
+    return {channel: budget for channel, budget in zip(channels, result.x)}
+
+
+def budget_allocator(
+    total_budget: int = 1000,
+    channels: Union[List[str], Tuple[str]] = [],
+    parameters: Optional[Dict[str, Tuple[float, float]]] = {},
+    budget_ranges: Optional[Dict[str, Tuple[float, float]]] = {},
+) -> DataFrame:
+
+    optimal_budget = optimize_budget_distribution(
+        total_budget, channels, parameters, budget_ranges
+    )
+
+    return DataFrame(
+        {
+            "estimated_contribution": calculate_expected_contribution(
+                optimal_budget, parameters
+            ),
+            "optimal_budget": optimal_budget,
+        }
+    )
diff --git a/pymc_marketing/mmm/utils.py b/pymc_marketing/mmm/utils.py
@@ -1,6 +1,9 @@
+from typing import List
+
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
+from scipy.optimize import curve_fit
 
 
 def generate_fourier_modes(
@@ -33,3 +36,43 @@ def generate_fourier_modes(
             for func in ("sin", "cos")
         }
     )
+
+
+def michaelis_menten(x, L, k) -> float:
+    """
+    Calculate the Michaelis-Menten function value.
+
+    Parameters
+    ----------
+    x : float
+        The spent on a channel.
+    L : float
+        The maximum contribution a channel can make (also known as the plateau point).
+    k : float
+        The elbow on the function in `x` (Point where the curve change their direction)
+
+    Returns
+    -------
+    float
+        The value of the Michaelis-Menten function given the parameters.
+    """
+
+    return L * x / (k + x)
+
+
+def estimate_menten_parameters(
+    channel: str,
+    original_dataframe,
+    contributions,
+) -> List[float]:
+
+    x = original_dataframe[channel].to_numpy()
+    y = contributions.sel(quantile=0.5).sel(channel=channel).to_numpy()
+
+    # Initial guess for L and k
+    initial_guess = [max(y), 0.001]
+    # Curve fitting
+    popt, pcov = curve_fit(michaelis_menten, x, y, p0=initial_guess)
+
+    # Save the parameters
+    return popt
diff --git a/tests/mmm/test_budget_optimizer.py b/tests/mmm/test_budget_optimizer.py
@@ -0,0 +1,52 @@
+import pandas as pd
+import pytest
+
+from pymc_marketing.mmm.budget_optimizer import budget_allocator
+
+
+@pytest.mark.parametrize(
+    "allocation_mode, total_budget, channels, parameters, budget_ranges, expected",
+    [
+        (
+            "growth",
+            1000,
+            ["channel1", "channel2"],
+            {"channel1": 0.5, "channel2": 0.5},
+            {"channel1": (0, 1000), "channel2": (0, 1000)},
+            pd.DataFrame(
+                {
+                    "estimated_contribution": {"channel1": 250, "channel2": 250},
+                    "optimal_budget": {"channel1": 500, "channel2": 500},
+                }
+            ),
+        ),
+        (
+            "growth",
+            2000,
+            ["channel1", "channel2", "channel3"],
+            {"channel1": 0.3, "channel2": 0.3, "channel3": 0.4},
+            {"channel1": (0, 1000), "channel2": (0, 1000), "channel3": (0, 1000)},
+            pd.DataFrame(
+                {
+                    "estimated_contribution": {
+                        "channel1": 300,
+                        "channel2": 300,
+                        "channel3": 400,
+                    },
+                    "optimal_budget": {
+                        "channel1": 600,
+                        "channel2": 600,
+                        "channel3": 800,
+                    },
+                }
+            ),
+        ),
+    ],
+)
+def test_budget_allocator(
+    allocation_mode, total_budget, channels, parameters, budget_ranges, expected
+):
+    result = budget_allocator(
+        allocation_mode, total_budget, channels, parameters, budget_ranges
+    )
+    pd.testing.assert_frame_equal(result, expected)