Solving optimizer issues & typos (#933)

* Correcting typo num_days by horizon * Correcting typo num_days by horizon and scaler * Running notebooks * Update UML Diagrams * Rename horizon by periods * Adding test requested to check budget outputs * Running notebooks * Update UML Diagrams * Small notebook missing change. * Correction in tests * Change on name * running notebook modifying function * Update UML Diagrams
pymc-labs · Sep 10, 2024 · f5755cd · f5755cd
1 parent b528016
commit f5755cd
Show file tree

Hide file tree

Showing 8 changed files with 1,606 additions and 1,464 deletions.
diff --git a/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb b/docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb
diff --git a/docs/source/notebooks/mmm/mmm_example.ipynb b/docs/source/notebooks/mmm/mmm_example.ipynb
diff --git a/docs/source/notebooks/mmm/model.nc b/docs/source/notebooks/mmm/model.nc
diff --git a/docs/source/uml/classes_mmm.png b/docs/source/uml/classes_mmm.png
diff --git a/pymc_marketing/mmm/budget_optimizer.py b/pymc_marketing/mmm/budget_optimizer.py
@@ -50,8 +50,8 @@ class BudgetOptimizer(BaseModel):
         The adstock class.
     saturation : SaturationTransformation
         The saturation class.
-    num_days : int
-        The number of days.
+    num_periods : int
+        The number of time units.
     parameters : dict
         A dictionary of parameters for each channel.
     adstock_first : bool, optional
@@ -65,10 +65,17 @@ class BudgetOptimizer(BaseModel):
     saturation: SaturationTransformation = Field(
         ..., description="The saturation transformation class."
     )
-    num_days: int = Field(..., gt=0, description="The number of days.")
+    num_periods: int = Field(
+        ...,
+        gt=0,
+        description="The number of time units at time granularity which the budget is to be allocated.",
+    )
     parameters: dict[str, dict[str, dict[str, float]]] = Field(
         ..., description="A dictionary of parameters for each channel."
     )
+    scales: np.ndarray = Field(
+        ..., description="The scale parameter for each channel variable"
+    )
     adstock_first: bool = Field(
         True,
         description="Whether to apply adstock transformation first or saturation transformation first.",
@@ -97,7 +104,7 @@ def objective(self, budgets: list[float]) -> float:
             else (self.saturation, self.adstock)
         )
         for idx, (_channel, params) in enumerate(self.parameters.items()):
-            budget = budgets[idx]
+            budget = budgets[idx] / self.scales[idx]
             first_params = (
                 params["adstock_params"]
                 if self.adstock_first
@@ -108,7 +115,7 @@ def objective(self, budgets: list[float]) -> float:
                 if self.adstock_first
                 else params["adstock_params"]
             )
-            spend = np.full(self.num_days, budget)
+            spend = np.full(self.num_periods, budget)
             spend_extended = np.concatenate([spend, np.zeros(self.adstock.l_max)])
             transformed_spend = second_transform.function(
                 x=first_transform.function(x=spend_extended, **first_params),

diff --git a/pymc_marketing/mmm/delayed_saturated_mmm.py b/pymc_marketing/mmm/delayed_saturated_mmm.py
@@ -1990,6 +1990,7 @@ def _create_synth_dataset(
         time_granularity: str,
         time_length: int,
         lag: int,
+        noise_level: float = 0.01,
     ) -> pd.DataFrame:
         """
         Create a synthetic dataset based on the given allocation strategy (Budget) and time granularity.
@@ -2014,6 +2015,8 @@ def _create_synth_dataset(
             The length of the synthetic dataset in terms of the time granularity.
         lag : int
             The lag value (not used in this function).
+        noise_level : int
+            The level of noise added to the allocation strategy (by default 1%).
 
         Returns
         -------
@@ -2063,7 +2066,9 @@ def _create_synth_dataset(
                 self.date_column: pd.to_datetime(new_date),
                 **{
                     channel: allocation_strategy.get(channel, 0)
-                    + np.random.normal(0, 0.1 * allocation_strategy.get(channel, 0))
+                    + np.random.normal(
+                        0, noise_level * allocation_strategy.get(channel, 0)
+                    )
                     for channel in channels
                 },
                 **{control: 0 for control in _controls},
@@ -2078,10 +2083,11 @@ def allocate_budget_to_maximize_response(
         self,
         budget: float | int,
         time_granularity: str,
-        num_days: int,
-        budget_bounds: dict[str, list[Any]] | None = None,
+        num_periods: int,
+        budget_bounds: dict[str, tuple[float, float]] | None = None,
         custom_constraints: dict[str, float] | None = None,
         quantile: float = 0.5,
+        noise_level: float = 0.01,
     ) -> az.InferenceData:
         """
         Allocate the given budget to maximize the response over a specified time period.
@@ -2101,9 +2107,9 @@ def allocate_budget_to_maximize_response(
         budget : float or int
             The total budget to be allocated.
         time_granularity : str
-            The granularity of the time periods (e.g., 'daily', 'weekly', 'monthly').
-        num_days : int
-            The number of days over which the budget is to be allocated.
+            The granularity of the time units (num_periods) (e.g., 'daily', 'weekly', 'monthly').
+        num_periods : float
+            The number of time units over which the budget is to be allocated.
         budget_bounds : dict[str, list[Any]], optional
             A dictionary specifying the lower and upper bounds for the budget allocation
             for each channel. If None, no bounds are applied.
@@ -2126,54 +2132,32 @@ def allocate_budget_to_maximize_response(
             quantile=quantile
         )
 
-        scale_budget = budget / self.channel_transformer["scaler"].scale_.max()
-
-        if isinstance(budget_bounds, dict):
-            scale_budget_bounds: dict[str, tuple[float, float]] | None = {
-                k: (
-                    v[0] / self.channel_transformer["scaler"].scale_.max(),
-                    v[1] / self.channel_transformer["scaler"].scale_.max(),
-                )
-                for k, v in budget_bounds.items()
-            }
-        else:
-            scale_budget_bounds = None
-
         allocator = BudgetOptimizer(
             adstock=self.adstock,
             saturation=self.saturation,
             parameters=parameters_mid,
             adstock_first=self.adstock_first,
-            num_days=num_days,
+            num_periods=num_periods,
+            scales=self.channel_transformer["scaler"].scale_,
         )
 
         self.optimal_allocation_dict, _ = allocator.allocate_budget(
-            total_budget=scale_budget,
-            budget_bounds=scale_budget_bounds,
+            total_budget=budget,
+            budget_bounds=budget_bounds,
             custom_constraints=custom_constraints,
         )
 
-        inverse_scaled_channel_spend = self.channel_transformer.inverse_transform(
-            np.array([list(self.optimal_allocation_dict.values())])
-        )
-        original_scale_allocation_dict = dict(
-            zip(
-                self.optimal_allocation_dict.keys(),
-                inverse_scaled_channel_spend[0],
-                strict=False,
-            )
-        )
-
         synth_dataset = self._create_synth_dataset(
             df=self.X,
             date_column=self.date_column,
-            allocation_strategy=original_scale_allocation_dict,
+            allocation_strategy=self.optimal_allocation_dict,
             channels=self.channel_columns,
             controls=self.control_columns,
             target_col=self.output_var,
             time_granularity=time_granularity,
-            time_length=num_days,
+            time_length=num_periods,
             lag=self.adstock.l_max,
+            noise_level=noise_level,
         )
 
         return self.sample_posterior_predictive(

diff --git a/tests/mmm/test_budget_optimizer.py b/tests/mmm/test_budget_optimizer.py
@@ -81,9 +81,10 @@ def test_allocate_budget(
     optimizer = BudgetOptimizer(
         adstock=adstock,
         saturation=saturation,
-        num_days=30,
+        num_periods=30,
         parameters=parameters,
         adstock_first=True,
+        scales=np.array([1, 1]),
     )
 
     # Allocate Budget
@@ -130,9 +131,10 @@ def test_allocate_budget_zero_total(
     optimizer = BudgetOptimizer(
         adstock=adstock,
         saturation=saturation,
-        num_days=30,
+        num_periods=30,
         parameters=parameters,
         adstock_first=True,
+        scales=np.array([1, 1]),
     )
     match = "Using default equality constraint"
     with pytest.warns(UserWarning, match=match):
@@ -168,9 +170,10 @@ def test_allocate_budget_custom_minimize_args(minimize_mock) -> None:
     optimizer = optimizer = BudgetOptimizer(
         adstock=adstock,
         saturation=saturation,
-        num_days=30,
+        num_periods=30,
         parameters=parameters,
         adstock_first=True,
+        scales=np.array([1, 1]),
     )
     match = "Using default equality constraint"
     with pytest.warns(UserWarning, match=match):
@@ -226,9 +229,10 @@ def test_allocate_budget_infeasible_constraints(
     optimizer = optimizer = BudgetOptimizer(
         adstock=adstock,
         saturation=saturation,
-        num_days=30,
+        num_periods=30,
         parameters=parameters,
         adstock_first=True,
+        scales=np.array([1, 1]),
     )
 
     with pytest.raises(MinimizeException, match="Optimization failed"):

diff --git a/tests/mmm/test_delayed_saturated_mmm.py b/tests/mmm/test_delayed_saturated_mmm.py
@@ -466,6 +466,43 @@ def test_channel_contributions_forward_pass_recovers_contribution(
             y=mmm_fitted.y.max(),
         )
 
+    def test_allocate_budget_to_maximize_response(self, mmm_fitted: MMM) -> None:
+        budget = 2.0
+        num_periods = 8
+        time_granularity = "weekly"
+        budget_bounds = {"channel_1": [0.5, 1.2], "channel_2": [0.5, 1.5]}
+        noise_level = 0.1
+
+        # Call the method
+        inference_data = mmm_fitted.allocate_budget_to_maximize_response(
+            budget=budget,
+            time_granularity=time_granularity,
+            num_periods=num_periods,
+            budget_bounds=budget_bounds,
+            noise_level=noise_level,
+        )
+
+        inference_periods = len(inference_data.coords["date"])
+
+        # a) Total budget consistency check
+        allocated_budget = sum(mmm_fitted.optimal_allocation_dict.values())
+        assert np.isclose(
+            allocated_budget, budget, rtol=1e-5
+        ), f"Total allocated budget {allocated_budget} does not match expected budget {budget}"
+
+        # b) Budget boundaries check
+        for channel, bounds in budget_bounds.items():
+            allocation = mmm_fitted.optimal_allocation_dict[channel]
+            lower_bound, upper_bound = bounds
+            assert (
+                lower_bound <= allocation <= upper_bound
+            ), f"Channel {channel} allocation {allocation} is out of bounds ({lower_bound}, {upper_bound})"
+
+        # c) num_periods consistency check
+        assert (
+            inference_periods == num_periods
+        ), f"Number of periods in the data {inference_periods} does not match the expected {num_periods}"
+
     @pytest.mark.parametrize(
         argnames="original_scale",
         argvalues=[False, True],