Skip to content

Commit

Permalink
Solving optimizer issues & typos (#933)
Browse files Browse the repository at this point in the history
* Correcting typo num_days by horizon

* Correcting typo num_days by horizon and scaler

* Running notebooks

* Update UML Diagrams

* Rename horizon by periods

* Adding test requested to check budget outputs

* Running notebooks

* Update UML Diagrams

* Small notebook missing change.

* Correction in tests

* Change on name

* running notebook modifying function

* Update UML Diagrams
  • Loading branch information
cetagostini authored and twiecki committed Sep 10, 2024
1 parent b528016 commit f5755cd
Show file tree
Hide file tree
Showing 8 changed files with 1,606 additions and 1,464 deletions.
466 changes: 244 additions & 222 deletions docs/source/notebooks/mmm/mmm_budget_allocation_example.ipynb

Large diffs are not rendered by default.

2,484 changes: 1,286 additions & 1,198 deletions docs/source/notebooks/mmm/mmm_example.ipynb

Large diffs are not rendered by default.

Binary file modified docs/source/notebooks/mmm/model.nc
Binary file not shown.
Binary file modified docs/source/uml/classes_mmm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 12 additions & 5 deletions pymc_marketing/mmm/budget_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ class BudgetOptimizer(BaseModel):
The adstock class.
saturation : SaturationTransformation
The saturation class.
num_days : int
The number of days.
num_periods : int
The number of time units.
parameters : dict
A dictionary of parameters for each channel.
adstock_first : bool, optional
Expand All @@ -65,10 +65,17 @@ class BudgetOptimizer(BaseModel):
saturation: SaturationTransformation = Field(
..., description="The saturation transformation class."
)
num_days: int = Field(..., gt=0, description="The number of days.")
num_periods: int = Field(
...,
gt=0,
description="The number of time units at time granularity which the budget is to be allocated.",
)
parameters: dict[str, dict[str, dict[str, float]]] = Field(
..., description="A dictionary of parameters for each channel."
)
scales: np.ndarray = Field(
..., description="The scale parameter for each channel variable"
)
adstock_first: bool = Field(
True,
description="Whether to apply adstock transformation first or saturation transformation first.",
Expand Down Expand Up @@ -97,7 +104,7 @@ def objective(self, budgets: list[float]) -> float:
else (self.saturation, self.adstock)
)
for idx, (_channel, params) in enumerate(self.parameters.items()):
budget = budgets[idx]
budget = budgets[idx] / self.scales[idx]
first_params = (
params["adstock_params"]
if self.adstock_first
Expand All @@ -108,7 +115,7 @@ def objective(self, budgets: list[float]) -> float:
if self.adstock_first
else params["adstock_params"]
)
spend = np.full(self.num_days, budget)
spend = np.full(self.num_periods, budget)
spend_extended = np.concatenate([spend, np.zeros(self.adstock.l_max)])
transformed_spend = second_transform.function(
x=first_transform.function(x=spend_extended, **first_params),
Expand Down
54 changes: 19 additions & 35 deletions pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1990,6 +1990,7 @@ def _create_synth_dataset(
time_granularity: str,
time_length: int,
lag: int,
noise_level: float = 0.01,
) -> pd.DataFrame:
"""
Create a synthetic dataset based on the given allocation strategy (Budget) and time granularity.
Expand All @@ -2014,6 +2015,8 @@ def _create_synth_dataset(
The length of the synthetic dataset in terms of the time granularity.
lag : int
The lag value (not used in this function).
noise_level : int
The level of noise added to the allocation strategy (by default 1%).
Returns
-------
Expand Down Expand Up @@ -2063,7 +2066,9 @@ def _create_synth_dataset(
self.date_column: pd.to_datetime(new_date),
**{
channel: allocation_strategy.get(channel, 0)
+ np.random.normal(0, 0.1 * allocation_strategy.get(channel, 0))
+ np.random.normal(
0, noise_level * allocation_strategy.get(channel, 0)
)
for channel in channels
},
**{control: 0 for control in _controls},
Expand All @@ -2078,10 +2083,11 @@ def allocate_budget_to_maximize_response(
self,
budget: float | int,
time_granularity: str,
num_days: int,
budget_bounds: dict[str, list[Any]] | None = None,
num_periods: int,
budget_bounds: dict[str, tuple[float, float]] | None = None,
custom_constraints: dict[str, float] | None = None,
quantile: float = 0.5,
noise_level: float = 0.01,
) -> az.InferenceData:
"""
Allocate the given budget to maximize the response over a specified time period.
Expand All @@ -2101,9 +2107,9 @@ def allocate_budget_to_maximize_response(
budget : float or int
The total budget to be allocated.
time_granularity : str
The granularity of the time periods (e.g., 'daily', 'weekly', 'monthly').
num_days : int
The number of days over which the budget is to be allocated.
The granularity of the time units (num_periods) (e.g., 'daily', 'weekly', 'monthly').
num_periods : float
The number of time units over which the budget is to be allocated.
budget_bounds : dict[str, list[Any]], optional
A dictionary specifying the lower and upper bounds for the budget allocation
for each channel. If None, no bounds are applied.
Expand All @@ -2126,54 +2132,32 @@ def allocate_budget_to_maximize_response(
quantile=quantile
)

scale_budget = budget / self.channel_transformer["scaler"].scale_.max()

if isinstance(budget_bounds, dict):
scale_budget_bounds: dict[str, tuple[float, float]] | None = {
k: (
v[0] / self.channel_transformer["scaler"].scale_.max(),
v[1] / self.channel_transformer["scaler"].scale_.max(),
)
for k, v in budget_bounds.items()
}
else:
scale_budget_bounds = None

allocator = BudgetOptimizer(
adstock=self.adstock,
saturation=self.saturation,
parameters=parameters_mid,
adstock_first=self.adstock_first,
num_days=num_days,
num_periods=num_periods,
scales=self.channel_transformer["scaler"].scale_,
)

self.optimal_allocation_dict, _ = allocator.allocate_budget(
total_budget=scale_budget,
budget_bounds=scale_budget_bounds,
total_budget=budget,
budget_bounds=budget_bounds,
custom_constraints=custom_constraints,
)

inverse_scaled_channel_spend = self.channel_transformer.inverse_transform(
np.array([list(self.optimal_allocation_dict.values())])
)
original_scale_allocation_dict = dict(
zip(
self.optimal_allocation_dict.keys(),
inverse_scaled_channel_spend[0],
strict=False,
)
)

synth_dataset = self._create_synth_dataset(
df=self.X,
date_column=self.date_column,
allocation_strategy=original_scale_allocation_dict,
allocation_strategy=self.optimal_allocation_dict,
channels=self.channel_columns,
controls=self.control_columns,
target_col=self.output_var,
time_granularity=time_granularity,
time_length=num_days,
time_length=num_periods,
lag=self.adstock.l_max,
noise_level=noise_level,
)

return self.sample_posterior_predictive(
Expand Down
12 changes: 8 additions & 4 deletions tests/mmm/test_budget_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,10 @@ def test_allocate_budget(
optimizer = BudgetOptimizer(
adstock=adstock,
saturation=saturation,
num_days=30,
num_periods=30,
parameters=parameters,
adstock_first=True,
scales=np.array([1, 1]),
)

# Allocate Budget
Expand Down Expand Up @@ -130,9 +131,10 @@ def test_allocate_budget_zero_total(
optimizer = BudgetOptimizer(
adstock=adstock,
saturation=saturation,
num_days=30,
num_periods=30,
parameters=parameters,
adstock_first=True,
scales=np.array([1, 1]),
)
match = "Using default equality constraint"
with pytest.warns(UserWarning, match=match):
Expand Down Expand Up @@ -168,9 +170,10 @@ def test_allocate_budget_custom_minimize_args(minimize_mock) -> None:
optimizer = optimizer = BudgetOptimizer(
adstock=adstock,
saturation=saturation,
num_days=30,
num_periods=30,
parameters=parameters,
adstock_first=True,
scales=np.array([1, 1]),
)
match = "Using default equality constraint"
with pytest.warns(UserWarning, match=match):
Expand Down Expand Up @@ -226,9 +229,10 @@ def test_allocate_budget_infeasible_constraints(
optimizer = optimizer = BudgetOptimizer(
adstock=adstock,
saturation=saturation,
num_days=30,
num_periods=30,
parameters=parameters,
adstock_first=True,
scales=np.array([1, 1]),
)

with pytest.raises(MinimizeException, match="Optimization failed"):
Expand Down
37 changes: 37 additions & 0 deletions tests/mmm/test_delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,43 @@ def test_channel_contributions_forward_pass_recovers_contribution(
y=mmm_fitted.y.max(),
)

def test_allocate_budget_to_maximize_response(self, mmm_fitted: MMM) -> None:
budget = 2.0
num_periods = 8
time_granularity = "weekly"
budget_bounds = {"channel_1": [0.5, 1.2], "channel_2": [0.5, 1.5]}
noise_level = 0.1

# Call the method
inference_data = mmm_fitted.allocate_budget_to_maximize_response(
budget=budget,
time_granularity=time_granularity,
num_periods=num_periods,
budget_bounds=budget_bounds,
noise_level=noise_level,
)

inference_periods = len(inference_data.coords["date"])

# a) Total budget consistency check
allocated_budget = sum(mmm_fitted.optimal_allocation_dict.values())
assert np.isclose(
allocated_budget, budget, rtol=1e-5
), f"Total allocated budget {allocated_budget} does not match expected budget {budget}"

# b) Budget boundaries check
for channel, bounds in budget_bounds.items():
allocation = mmm_fitted.optimal_allocation_dict[channel]
lower_bound, upper_bound = bounds
assert (
lower_bound <= allocation <= upper_bound
), f"Channel {channel} allocation {allocation} is out of bounds ({lower_bound}, {upper_bound})"

# c) num_periods consistency check
assert (
inference_periods == num_periods
), f"Number of periods in the data {inference_periods} does not match the expected {num_periods}"

@pytest.mark.parametrize(
argnames="original_scale",
argvalues=[False, True],
Expand Down

0 comments on commit f5755cd

Please sign in to comment.