Skip to content

Commit

Permalink
Add Flake8-BugBear and Bandit (Security!) (#637)
Browse files Browse the repository at this point in the history
* fix potential bugs

* minor improvements

* remove rule for nb

* fix test

* improve tests syntax

* use stacklevel 2 for warnings

* use stacklevel 1 for warnings as they are used in public methods

* ignore B904

* undo change

* ricardos feedback

* use fit_posterior

* Update pymc_marketing/clv/models/gamma_gamma.py

Co-authored-by: Ricardo Vieira <[email protected]>

* last fix XD

---------

Co-authored-by: Ricardo Vieira <[email protected]>
  • Loading branch information
juanitorduz and ricardoV94 authored Apr 18, 2024
1 parent 582d2ed commit c02d349
Show file tree
Hide file tree
Showing 14 changed files with 67 additions and 45 deletions.
2 changes: 1 addition & 1 deletion docs/source/notebooks/clv/clv_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@
"source": [
"ids = [841, 1981, 157, 1516]\n",
"ax = az.plot_posterior(num_purchases.sel(customer_id=ids), grid=(2, 2))\n",
"for axi, id in zip(ax.ravel(), ids):\n",
"for axi, id in zip(ax.ravel(), ids, strict=False):\n",
" axi.set_title(f\"Customer: {id}\", size=20)\n",
"plt.suptitle(\"Expected number purchases in the next period\", fontsize=28, y=1.05);"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/notebooks/general/other_nuts_samplers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
" for j, (idata, label) in enumerate(\n",
" zip(\n",
" [idata_blackjax, idata_nutpie, idata_numpyro],\n",
" [\"blackjax\", \"nutpie\", \"numpyro\"],\n",
" [\"blackjax\", \"nutpie\", \"numpyro\"], strict=False,\n",
" )\n",
" ):\n",
" az.plot_posterior(\n",
Expand Down
8 changes: 4 additions & 4 deletions pymc_marketing/clv/models/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _add_fit_data_group(self, data: pd.DataFrame) -> None:
category=UserWarning,
message="The group fit_data is not defined in the InferenceData scheme",
)
assert self.idata is not None
assert self.idata is not None # noqa: S101
self.idata.add_groups(fit_data=data.to_xarray())

def fit( # type: ignore
Expand Down Expand Up @@ -206,8 +206,8 @@ def thin_fit_result(self, keep_every: int):
)
"""
self.fit_result # Raise Error if fit didn't happen yet
assert self.idata is not None
self.fit_result # noqa: B018 (Raise Error if fit didn't happen yet)
assert self.idata is not None # noqa: S101
new_idata = self.idata.isel(draw=slice(None, None, keep_every)).copy()
return type(self)._build_with_idata(new_idata)

Expand Down Expand Up @@ -237,7 +237,7 @@ def fit_result(self, res: az.InferenceData) -> None:
if self.idata is None:
self.idata = res
elif "posterior" in self.idata:
warnings.warn("Overriding pre-existing fit_result")
warnings.warn("Overriding pre-existing fit_result", stacklevel=1)
self.idata.posterior = res
else:
self.idata.posterior = res
Expand Down
17 changes: 9 additions & 8 deletions pymc_marketing/clv/models/gamma_gamma.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ def expected_customer_spend(
mean_transaction_value, frequency = to_xarray(
customer_id, mean_transaction_value, frequency
)
assert self.idata is not None, "Model must be fitted first"
p = self.idata.posterior["p"]
q = self.idata.posterior["q"]
v = self.idata.posterior["v"]
posterior = self.fit_result

p = posterior["p"]
q = posterior["q"]
v = posterior["v"]

individual_weight = p * frequency / (p * frequency + q - 1)
population_mean = v * p / (q - 1)
Expand Down Expand Up @@ -89,10 +90,10 @@ def distribution_new_customer_spend(
def expected_new_customer_spend(self) -> xarray.DataArray:
"""Expected transaction value for a new customer"""

assert self.idata is not None, "Model must be fitted first"
p_mean = self.idata.posterior["p"]
q_mean = self.idata.posterior["q"]
v_mean = self.idata.posterior["v"]
posterior = self.fit_result
p_mean = posterior["p"]
q_mean = posterior["q"]
v_mean = posterior["v"]

# Closed form solution to the posterior of nu
# Eq 3 from [1], p.3
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/clv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def _find_first_transactions(


def clv_summary(*args, **kwargs):
warnings.warn("clv_summary was renamed to rfm_summary", UserWarning)
warnings.warn("clv_summary was renamed to rfm_summary", UserWarning, stacklevel=1)
return rfm_summary(*args, **kwargs)


Expand Down
14 changes: 10 additions & 4 deletions pymc_marketing/mmm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ def plot_components_contributions(self, **plt_kwargs: Any) -> plt.Figure:
for arg, var_contribution in zip(
["control_columns", "yearly_seasonality"],
["control_contributions", "fourier_contributions"],
strict=True,
):
if getattr(self, arg, None):
contributions = self._format_model_contributions(
Expand All @@ -413,6 +414,7 @@ def plot_components_contributions(self, **plt_kwargs: Any) -> plt.Figure:
"control_contribution",
"fourier_contribution",
],
strict=False,
)
):
if self.X is not None:
Expand Down Expand Up @@ -688,7 +690,7 @@ def plot_budget_scenearios(

# Plot all scenarios
for i, (scenario, upper_bound, lower_bound) in enumerate(
zip(scenarios, upper_bounds, lower_bounds)
zip(scenarios, upper_bounds, lower_bounds, strict=False)
):
color = f"C{i}"
offset = i * bar_width - 0.4 + bar_width / 2
Expand Down Expand Up @@ -914,7 +916,9 @@ def optimize_channel_budget_for_maximum_contribution(
"The 'parameters' argument (keyword-only) must be provided and non-empty."
)

warnings.warn("This budget allocator method is experimental", UserWarning)
warnings.warn(
"This budget allocator method is experimental", UserWarning, stacklevel=1
)

return budget_allocator(
method=method,
Expand Down Expand Up @@ -947,7 +951,9 @@ def compute_channel_curve_optimization_parameters_original_scale(
parameters for each channel based on the method used.
"""
warnings.warn(
"The curve optimization parameters method is experimental", UserWarning
"The curve optimization parameters method is experimental",
UserWarning,
stacklevel=1,
)

channel_contributions = self.compute_channel_contribution_original_scale().mean(
Expand Down Expand Up @@ -1044,7 +1050,7 @@ def legend_title_func(channel):
axes_channels = (
zip(repeat(axes), channels_to_plot)
if same_axes
else zip(np.ravel(axes), channels_to_plot)
else zip(np.ravel(axes), channels_to_plot, strict=False)
)

for i, (ax, channel) in enumerate(axes_channels):
Expand Down
6 changes: 4 additions & 2 deletions pymc_marketing/mmm/budget_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def objective_distribution(

sum_contributions = 0.0

for channel, budget in zip(channels, x):
for channel, budget in zip(channels, x, strict=False):
if method == "michaelis-menten":
L, k = parameters[channel]
sum_contributions += michaelis_menten(budget, L, k)
Expand Down Expand Up @@ -182,7 +182,9 @@ def optimize_budget_distribution(
constraints=constraints,
)

return {channel: budget for channel, budget in zip(channels, result.x)}
return {
channel: budget for channel, budget in zip(channels, result.x, strict=False)
}


def budget_allocator(
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ def _data_setter(
try:
new_channel_data = X[self.channel_columns].to_numpy()
except KeyError as e:
raise RuntimeError("New data must contain channel_data!", e)
raise RuntimeError("New data must contain channel_data!") from e

def identity(x):
return x
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/mmm/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def batched_convolution(
if l_max is None:
try:
l_max = w.shape[-1].eval()
except Exception:
except Exception: # noqa: S110
pass
# Get the broadcast shapes of x and w but ignoring their last dimension.
# The last dimension of x is the "time" axis, which doesn't get broadcast
Expand Down
3 changes: 2 additions & 1 deletion pymc_marketing/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,8 @@ def fit(

X_df = pd.DataFrame(X, columns=X.columns)
combined_data = pd.concat([X_df, y_df], axis=1)
assert all(combined_data.columns), "All columns must have non-empty names"
if not all(combined_data.columns):
raise ValueError("All columns must have non-empty names")
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
Expand Down
11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,21 @@ repository = "https://github.com/pymc-labs/pymc-marketing"
#changelog = ""

[tool.ruff.lint]
select = ["E", "F", "I", "W", "UP", "RUF"]
select = ["B", "E", "F", "I", "RUF", "S", "UP", "W"]
ignore = [
"B904", # raise-without-from-inside-except
"RUF001", # String contains ambiguous character (such as Greek letters)
"RUF002", # Docstring contains ambiguous character (such as Greek letters)
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
]
[tool.ruff.lint.per-file-ignores]
"docs/source/notebooks/*" = [
"B018", # Checks for "useless" expressions. Not useful for notebooks.
]
"tests/*" = [
"B018", # Checks for "useless" expressions. This is useful for tests.
"S101", # Use of assert
]

[tool.ruff.lint.pycodestyle]
max-line-length = 120
Expand Down
4 changes: 2 additions & 2 deletions tests/clv/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def test_to_xarray():
np.testing.assert_array_equal(new_x.coords["customer_id"], customer_id)
np.testing.assert_array_equal(x, new_x.values)

for old, new in zip((x, y, z), to_xarray(customer_id, x, y, z)):
for old, new in zip((x, y, z), to_xarray(customer_id, x, y, z), strict=False):
assert isinstance(new, xarray.DataArray)
assert new.dims == ("customer_id",)
np.testing.assert_array_equal(new.coords["customer_id"], customer_id)
np.testing.assert_array_equal(old, new.values)

new_y = to_xarray(customer_id, y, dim="test_dim")
new_y.dims == ("test_dim",)
assert new_y.dims == ("test_dim",)
np.testing.assert_array_equal(new_y.coords["test_dim"], customer_id)


Expand Down
2 changes: 1 addition & 1 deletion tests/mmm/test_lift_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_lift_tests_missing(df_lift_tests) -> None:
with pytest.raises(MissingLiftTestError) as err:
lift_test_indices(df_lift_tests, model)

err.value.missing_values.tolist() == [0]
assert err.value.missing_values.tolist() == [0]


@pytest.fixture
Expand Down
37 changes: 20 additions & 17 deletions tests/model_builder/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,24 @@

from pymc_marketing.model_builder import ModelBuilder

rng = np.random.default_rng(42)


@pytest.fixture(scope="module")
def toy_X():
x = np.linspace(start=0, stop=1, num=100)
X = pd.DataFrame({"input": x})
return X
return pd.DataFrame({"input": x})


@pytest.fixture(scope="module")
def toy_y(toy_X):
rng = np.random.default_rng(42)
y = 5 * toy_X["input"] + 3
y = y + np.random.normal(0, 1, size=len(toy_X))
y = y + rng.normal(0, 1, size=len(toy_X))
y = pd.Series(y, name="output")
return y


@pytest.fixture(scope="module")
def fitted_model_instance(toy_X, toy_y):
def fitted_model_instance(toy_X):
sampler_config = {
"draws": 100,
"tune": 100,
Expand All @@ -71,19 +69,18 @@ def fitted_model_instance(toy_X, toy_y):


@pytest.fixture(scope="module")
def not_fitted_model_instance(toy_X, toy_y):
def not_fitted_model_instance():
sampler_config = {"draws": 100, "tune": 100, "chains": 2, "target_accept": 0.95}
model_config = {
"a": {"loc": 0, "scale": 10, "dims": ("numbers",)},
"b": {"loc": 0, "scale": 10},
"obs_error": 2,
}
model = ModelBuilderTest(
return ModelBuilderTest(
model_config=model_config,
sampler_config=sampler_config,
test_parameter="test_paramter",
)
return model


class ModelBuilderTest(ModelBuilder):
Expand Down Expand Up @@ -177,12 +174,13 @@ def test_save_input_params(fitted_model_instance):


def test_save_load(fitted_model_instance):
rng = np.random.default_rng(42)
temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False)
fitted_model_instance.save(temp.name)
test_builder2 = ModelBuilderTest.load(temp.name)
assert fitted_model_instance.idata.groups() == test_builder2.idata.groups()
assert fitted_model_instance.id == test_builder2.id
x_pred = np.random.uniform(low=0, high=1, size=100)
x_pred = rng.uniform(low=0, high=1, size=100)
prediction_data = pd.DataFrame({"input": x_pred})
pred1 = fitted_model_instance.predict(prediction_data["input"])
pred2 = test_builder2.predict(prediction_data["input"])
Expand Down Expand Up @@ -213,21 +211,23 @@ def test_empty_sampler_config_fit(toy_X, toy_y):


def test_fit(fitted_model_instance):
rng = np.random.default_rng(42)
assert fitted_model_instance.idata is not None
assert "posterior" in fitted_model_instance.idata.groups()
assert fitted_model_instance.idata.posterior.dims["draw"] == 100

prediction_data = pd.DataFrame(
{"input": np.random.uniform(low=0, high=1, size=100)}
)
prediction_data = pd.DataFrame({"input": rng.uniform(low=0, high=1, size=100)})
fitted_model_instance.predict(prediction_data["input"])
post_pred = fitted_model_instance.sample_posterior_predictive(
prediction_data["input"], extend_idata=True, combined=True
)
post_pred[fitted_model_instance.output_var].shape[0] == prediction_data.input.shape
assert (
post_pred[fitted_model_instance.output_var].shape[0]
== prediction_data.input.shape[0]
)


def test_fit_no_y(toy_X):
def test_fit_no_t(toy_X):
model_builder = ModelBuilderTest()
model_builder.idata = model_builder.fit(X=toy_X, chains=1, draws=100, tune=100)
assert model_builder.model is not None
Expand All @@ -240,7 +240,8 @@ def test_fit_no_y(toy_X):
reason="Permissions for temp files not granted on windows CI.",
)
def test_predict(fitted_model_instance):
x_pred = np.random.uniform(low=0, high=1, size=100)
rng = np.random.default_rng(42)
x_pred = rng.uniform(low=0, high=1, size=100)
prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.predict(prediction_data["input"])
# Perform elementwise comparison using numpy
Expand All @@ -250,8 +251,9 @@ def test_predict(fitted_model_instance):

@pytest.mark.parametrize("combined", [True, False])
def test_sample_posterior_predictive(fitted_model_instance, combined):
rng = np.random.default_rng(42)
n_pred = 100
x_pred = np.random.uniform(low=0, high=1, size=n_pred)
x_pred = rng.uniform(low=0, high=1, size=n_pred)
prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.sample_posterior_predictive(
prediction_data["input"], combined=combined, extend_idata=True
Expand Down Expand Up @@ -294,6 +296,7 @@ def test_id():
def test_sample_xxx_predictive_keeps_second(
fitted_model_instance, toy_X, name: str
) -> None:
rng = np.random.default_rng(42)
method_name = f"sample_{name}"
method = getattr(fitted_model_instance, method_name)

Expand Down

0 comments on commit c02d349

Please sign in to comment.