Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Flake8-BugBear and Bandit (Security!) #637

Merged
merged 13 commits into from
Apr 18, 2024
2 changes: 1 addition & 1 deletion docs/source/notebooks/clv/clv_quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@
"source": [
"ids = [841, 1981, 157, 1516]\n",
"ax = az.plot_posterior(num_purchases.sel(customer_id=ids), grid=(2, 2))\n",
"for axi, id in zip(ax.ravel(), ids):\n",
"for axi, id in zip(ax.ravel(), ids, strict=False):\n",
" axi.set_title(f\"Customer: {id}\", size=20)\n",
"plt.suptitle(\"Expected number purchases in the next period\", fontsize=28, y=1.05);"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/notebooks/general/other_nuts_samplers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
" for j, (idata, label) in enumerate(\n",
" zip(\n",
" [idata_blackjax, idata_nutpie, idata_numpyro],\n",
" [\"blackjax\", \"nutpie\", \"numpyro\"],\n",
" [\"blackjax\", \"nutpie\", \"numpyro\"], strict=False,\n",
" )\n",
" ):\n",
" az.plot_posterior(\n",
Expand Down
8 changes: 4 additions & 4 deletions pymc_marketing/clv/models/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _add_fit_data_group(self, data: pd.DataFrame) -> None:
category=UserWarning,
message="The group fit_data is not defined in the InferenceData scheme",
)
assert self.idata is not None
assert self.idata is not None # noqa: S101
self.idata.add_groups(fit_data=data.to_xarray())

def fit( # type: ignore
Expand Down Expand Up @@ -206,8 +206,8 @@ def thin_fit_result(self, keep_every: int):
)

"""
self.fit_result # Raise Error if fit didn't happen yet
assert self.idata is not None
self.fit_result # noqa: B018 (Raise Error if fit didn't happen yet)
assert self.idata is not None # noqa: S101
new_idata = self.idata.isel(draw=slice(None, None, keep_every)).copy()
return type(self)._build_with_idata(new_idata)

Expand Down Expand Up @@ -237,7 +237,7 @@ def fit_result(self, res: az.InferenceData) -> None:
if self.idata is None:
self.idata = res
elif "posterior" in self.idata:
warnings.warn("Overriding pre-existing fit_result")
warnings.warn("Overriding pre-existing fit_result", stacklevel=1)
self.idata.posterior = res
else:
self.idata.posterior = res
Expand Down
17 changes: 9 additions & 8 deletions pymc_marketing/clv/models/gamma_gamma.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ def expected_customer_spend(
mean_transaction_value, frequency = to_xarray(
customer_id, mean_transaction_value, frequency
)
assert self.idata is not None, "Model must be fitted first"
p = self.idata.posterior["p"]
q = self.idata.posterior["q"]
v = self.idata.posterior["v"]
posterior = self.fit_result

p = posterior["p"]
q = posterior["q"]
v = posterior["v"]

individual_weight = p * frequency / (p * frequency + q - 1)
population_mean = v * p / (q - 1)
Expand Down Expand Up @@ -89,10 +90,10 @@ def distribution_new_customer_spend(
def expected_new_customer_spend(self) -> xarray.DataArray:
"""Expected transaction value for a new customer"""

assert self.idata is not None, "Model must be fitted first"
p_mean = self.idata.posterior["p"]
q_mean = self.idata.posterior["q"]
v_mean = self.idata.posterior["v"]
posterior = self.fit_result
p_mean = posterior["p"]
q_mean = posterior["q"]
v_mean = posterior["v"]

# Closed form solution to the posterior of nu
# Eq 3 from [1], p.3
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/clv/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def _find_first_transactions(


def clv_summary(*args, **kwargs):
warnings.warn("clv_summary was renamed to rfm_summary", UserWarning)
warnings.warn("clv_summary was renamed to rfm_summary", UserWarning, stacklevel=1)
return rfm_summary(*args, **kwargs)


Expand Down
14 changes: 10 additions & 4 deletions pymc_marketing/mmm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@
for arg, var_contribution in zip(
["control_columns", "yearly_seasonality"],
["control_contributions", "fourier_contributions"],
strict=True,
):
if getattr(self, arg, None):
contributions = self._format_model_contributions(
Expand All @@ -413,6 +414,7 @@
"control_contribution",
"fourier_contribution",
],
strict=False,
)
):
if self.X is not None:
Expand Down Expand Up @@ -688,7 +690,7 @@

# Plot all scenarios
for i, (scenario, upper_bound, lower_bound) in enumerate(
zip(scenarios, upper_bounds, lower_bounds)
zip(scenarios, upper_bounds, lower_bounds, strict=False)
):
color = f"C{i}"
offset = i * bar_width - 0.4 + bar_width / 2
Expand Down Expand Up @@ -914,7 +916,9 @@
"The 'parameters' argument (keyword-only) must be provided and non-empty."
)

warnings.warn("This budget allocator method is experimental", UserWarning)
warnings.warn(

Check warning on line 919 in pymc_marketing/mmm/base.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/base.py#L919

Added line #L919 was not covered by tests
"This budget allocator method is experimental", UserWarning, stacklevel=1
)

return budget_allocator(
method=method,
Expand Down Expand Up @@ -947,7 +951,9 @@
parameters for each channel based on the method used.
"""
warnings.warn(
"The curve optimization parameters method is experimental", UserWarning
"The curve optimization parameters method is experimental",
UserWarning,
stacklevel=1,
)

channel_contributions = self.compute_channel_contribution_original_scale().mean(
Expand Down Expand Up @@ -1044,7 +1050,7 @@
axes_channels = (
zip(repeat(axes), channels_to_plot)
if same_axes
else zip(np.ravel(axes), channels_to_plot)
else zip(np.ravel(axes), channels_to_plot, strict=False)
)

for i, (ax, channel) in enumerate(axes_channels):
Expand Down
6 changes: 4 additions & 2 deletions pymc_marketing/mmm/budget_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def objective_distribution(

sum_contributions = 0.0

for channel, budget in zip(channels, x):
for channel, budget in zip(channels, x, strict=False):
if method == "michaelis-menten":
L, k = parameters[channel]
sum_contributions += michaelis_menten(budget, L, k)
Expand Down Expand Up @@ -182,7 +182,9 @@ def optimize_budget_distribution(
constraints=constraints,
)

return {channel: budget for channel, budget in zip(channels, result.x)}
return {
channel: budget for channel, budget in zip(channels, result.x, strict=False)
}


def budget_allocator(
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/mmm/delayed_saturated_mmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@
try:
new_channel_data = X[self.channel_columns].to_numpy()
except KeyError as e:
raise RuntimeError("New data must contain channel_data!", e)
raise RuntimeError("New data must contain channel_data!") from e

Check warning on line 655 in pymc_marketing/mmm/delayed_saturated_mmm.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/delayed_saturated_mmm.py#L655

Added line #L655 was not covered by tests

def identity(x):
return x
Expand Down
2 changes: 1 addition & 1 deletion pymc_marketing/mmm/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
if l_max is None:
try:
l_max = w.shape[-1].eval()
except Exception:
except Exception: # noqa: S110

Check warning on line 95 in pymc_marketing/mmm/transformers.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/mmm/transformers.py#L95

Added line #L95 was not covered by tests
pass
# Get the broadcast shapes of x and w but ignoring their last dimension.
# The last dimension of x is the "time" axis, which doesn't get broadcast
Expand Down
3 changes: 2 additions & 1 deletion pymc_marketing/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,8 @@

X_df = pd.DataFrame(X, columns=X.columns)
combined_data = pd.concat([X_df, y_df], axis=1)
assert all(combined_data.columns), "All columns must have non-empty names"
if not all(combined_data.columns):
raise ValueError("All columns must have non-empty names")

Check warning on line 490 in pymc_marketing/model_builder.py

View check run for this annotation

Codecov / codecov/patch

pymc_marketing/model_builder.py#L490

Added line #L490 was not covered by tests
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
Expand Down
11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,21 @@ repository = "https://github.com/pymc-labs/pymc-marketing"
#changelog = ""

[tool.ruff.lint]
select = ["E", "F", "I", "W", "UP", "RUF"]
select = ["B", "E", "F", "I", "RUF", "S", "UP", "W"]
ignore = [
"B904", # raise-without-from-inside-except
"RUF001", # String contains ambiguous character (such as Greek letters)
"RUF002", # Docstring contains ambiguous character (such as Greek letters)
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
]
[tool.ruff.lint.per-file-ignores]
"docs/source/notebooks/*" = [
"B018", # Checks for "useless" expressions. Not useful for notebooks.
]
"tests/*" = [
"B018", # Checks for "useless" expressions. This is useful for tests.
"S101", # Use of assert
]

[tool.ruff.lint.pycodestyle]
max-line-length = 120
Expand Down
4 changes: 2 additions & 2 deletions tests/clv/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ def test_to_xarray():
np.testing.assert_array_equal(new_x.coords["customer_id"], customer_id)
np.testing.assert_array_equal(x, new_x.values)

for old, new in zip((x, y, z), to_xarray(customer_id, x, y, z)):
for old, new in zip((x, y, z), to_xarray(customer_id, x, y, z), strict=False):
assert isinstance(new, xarray.DataArray)
assert new.dims == ("customer_id",)
np.testing.assert_array_equal(new.coords["customer_id"], customer_id)
np.testing.assert_array_equal(old, new.values)

new_y = to_xarray(customer_id, y, dim="test_dim")
new_y.dims == ("test_dim",)
assert new_y.dims == ("test_dim",)
np.testing.assert_array_equal(new_y.coords["test_dim"], customer_id)


Expand Down
2 changes: 1 addition & 1 deletion tests/mmm/test_lift_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_lift_tests_missing(df_lift_tests) -> None:
with pytest.raises(MissingLiftTestError) as err:
lift_test_indices(df_lift_tests, model)

err.value.missing_values.tolist() == [0]
assert err.value.missing_values.tolist() == [0]


@pytest.fixture
Expand Down
37 changes: 20 additions & 17 deletions tests/model_builder/test_model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,24 @@

from pymc_marketing.model_builder import ModelBuilder

rng = np.random.default_rng(42)


@pytest.fixture(scope="module")
def toy_X():
x = np.linspace(start=0, stop=1, num=100)
X = pd.DataFrame({"input": x})
return X
return pd.DataFrame({"input": x})


@pytest.fixture(scope="module")
def toy_y(toy_X):
rng = np.random.default_rng(42)
y = 5 * toy_X["input"] + 3
y = y + np.random.normal(0, 1, size=len(toy_X))
y = y + rng.normal(0, 1, size=len(toy_X))
juanitorduz marked this conversation as resolved.
Show resolved Hide resolved
y = pd.Series(y, name="output")
return y


@pytest.fixture(scope="module")
def fitted_model_instance(toy_X, toy_y):
def fitted_model_instance(toy_X):
sampler_config = {
"draws": 100,
"tune": 100,
Expand All @@ -71,19 +69,18 @@ def fitted_model_instance(toy_X, toy_y):


@pytest.fixture(scope="module")
def not_fitted_model_instance(toy_X, toy_y):
def not_fitted_model_instance():
sampler_config = {"draws": 100, "tune": 100, "chains": 2, "target_accept": 0.95}
model_config = {
"a": {"loc": 0, "scale": 10, "dims": ("numbers",)},
"b": {"loc": 0, "scale": 10},
"obs_error": 2,
}
model = ModelBuilderTest(
return ModelBuilderTest(
model_config=model_config,
sampler_config=sampler_config,
test_parameter="test_paramter",
)
return model


class ModelBuilderTest(ModelBuilder):
Expand Down Expand Up @@ -177,12 +174,13 @@ def test_save_input_params(fitted_model_instance):


def test_save_load(fitted_model_instance):
rng = np.random.default_rng(42)
temp = tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", delete=False)
fitted_model_instance.save(temp.name)
test_builder2 = ModelBuilderTest.load(temp.name)
assert fitted_model_instance.idata.groups() == test_builder2.idata.groups()
assert fitted_model_instance.id == test_builder2.id
x_pred = np.random.uniform(low=0, high=1, size=100)
x_pred = rng.uniform(low=0, high=1, size=100)
juanitorduz marked this conversation as resolved.
Show resolved Hide resolved
prediction_data = pd.DataFrame({"input": x_pred})
pred1 = fitted_model_instance.predict(prediction_data["input"])
pred2 = test_builder2.predict(prediction_data["input"])
Expand Down Expand Up @@ -213,21 +211,23 @@ def test_empty_sampler_config_fit(toy_X, toy_y):


def test_fit(fitted_model_instance):
rng = np.random.default_rng(42)
assert fitted_model_instance.idata is not None
assert "posterior" in fitted_model_instance.idata.groups()
assert fitted_model_instance.idata.posterior.dims["draw"] == 100

prediction_data = pd.DataFrame(
{"input": np.random.uniform(low=0, high=1, size=100)}
)
prediction_data = pd.DataFrame({"input": rng.uniform(low=0, high=1, size=100)})
fitted_model_instance.predict(prediction_data["input"])
post_pred = fitted_model_instance.sample_posterior_predictive(
prediction_data["input"], extend_idata=True, combined=True
)
post_pred[fitted_model_instance.output_var].shape[0] == prediction_data.input.shape
assert (
post_pred[fitted_model_instance.output_var].shape[0]
== prediction_data.input.shape[0]
)


def test_fit_no_y(toy_X):
def test_fit_no_t(toy_X):
model_builder = ModelBuilderTest()
model_builder.idata = model_builder.fit(X=toy_X, chains=1, draws=100, tune=100)
assert model_builder.model is not None
Expand All @@ -240,7 +240,8 @@ def test_fit_no_y(toy_X):
reason="Permissions for temp files not granted on windows CI.",
)
def test_predict(fitted_model_instance):
x_pred = np.random.uniform(low=0, high=1, size=100)
rng = np.random.default_rng(42)
x_pred = rng.uniform(low=0, high=1, size=100)
prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.predict(prediction_data["input"])
# Perform elementwise comparison using numpy
Expand All @@ -250,8 +251,9 @@ def test_predict(fitted_model_instance):

@pytest.mark.parametrize("combined", [True, False])
def test_sample_posterior_predictive(fitted_model_instance, combined):
rng = np.random.default_rng(42)
n_pred = 100
x_pred = np.random.uniform(low=0, high=1, size=n_pred)
x_pred = rng.uniform(low=0, high=1, size=n_pred)
prediction_data = pd.DataFrame({"input": x_pred})
pred = fitted_model_instance.sample_posterior_predictive(
prediction_data["input"], combined=combined, extend_idata=True
Expand Down Expand Up @@ -294,6 +296,7 @@ def test_id():
def test_sample_xxx_predictive_keeps_second(
fitted_model_instance, toy_X, name: str
) -> None:
rng = np.random.default_rng(42)
method_name = f"sample_{name}"
method = getattr(fitted_model_instance, method_name)

Expand Down
Loading