From 793be6a9e67034d7726553af08c86c7cbf7ec32b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Capretto?= Date: Thu, 11 Apr 2024 13:54:07 -0300 Subject: [PATCH] Support PyMC 5.13 and fix bayeux related issues (#803) --- CHANGELOG.md | 2 ++ bambi/backend/pymc.py | 40 ++++++++++++++++++++++-------------- bambi/backend/terms.py | 6 +++++- bambi/families/univariate.py | 2 +- pyproject.toml | 2 +- 5 files changed, 34 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb8082f6a..8eb58aef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ * Fix bug in predictions with models using HSGP (#780) * Fix `get_model_covariates()` utility function (#801) +* Upgrade PyMC dependency to >= 5.13 (#803) +* Use `pm.compute_deterministics()` to compute deterministics when bayeux based samplers are used (#803) ### Documentation diff --git a/bambi/backend/pymc.py b/bambi/backend/pymc.py index 82b646ebe..a84d3c571 100644 --- a/bambi/backend/pymc.py +++ b/bambi/backend/pymc.py @@ -253,8 +253,13 @@ def _run_mcmc( return idata def _clean_results(self, idata, omit_offsets, include_mean, idata_from): - for group in idata.groups(): + # Before doing anything, make sure we compute deterministics. + if idata_from == "bayeux": + idata.posterior = pm.compute_deterministics( + idata.posterior, model=self.model, merge_dataset=True, progressbar=False + ) + for group in idata.groups(): getattr(idata, group).attrs["modeling_interface"] = "bambi" getattr(idata, group).attrs["modeling_interface_version"] = __version__ @@ -262,36 +267,41 @@ def _clean_results(self, idata, omit_offsets, include_mean, idata_from): offset_vars = [var for var in idata.posterior.data_vars if var.endswith("_offset")] idata.posterior = idata.posterior.drop_vars(offset_vars) - # Drop variables and dimensions associated with LKJ prior - vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")] - dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")] + # NOTE: + # This has not had an effect for a while since we haven't been supporting LKJ prior lately. - idata.posterior = idata.posterior.drop_vars(vars_to_drop) - idata.posterior = idata.posterior.drop_dims(dims_to_drop) + # Drop variables and dimensions associated with LKJ prior + # vars_to_drop = [var for var in idata.posterior.data_vars if var.startswith("_LKJ")] + # dims_to_drop = [dim for dim in idata.posterior.dims if dim.startswith("_LKJ")] + # idata.posterior = idata.posterior.drop_vars(vars_to_drop) + # idata.posterior = idata.posterior.drop_dims(dims_to_drop) dims_original = list(self.model.coords) # Identify bayeux idata and rename dims and coordinates to match PyMC model if idata_from == "bayeux": - pymc_model_dims = [dim for dim in dims_original if "_obs" not in dim] - bayeux_dims = [ - dim for dim in idata.posterior.dims if not dim.startswith(("chain", "draw")) - ] - cleaned_dims = dict(zip(bayeux_dims, pymc_model_dims)) + cleaned_dims = { + f"{dim}_0": dim + for dim in dims_original + if not dim.endswith("_obs") and f"{dim}_0" in idata.posterior.dims + } idata = idata.rename(cleaned_dims) - # Discard dims that are in the model but unused in the posterior + # Don't select dims that are in the model but unused in the posterior dims_original = [dim for dim in dims_original if dim in idata.posterior.dims] # This does not add any new coordinate, it just changes the order so the ones # ending in "__factor_dim" are placed after the others. - dims_group = [c for c in dims_original if c.endswith("__factor_dim")] + dims_group = [dim for dim in dims_original if dim.endswith("__factor_dim")] # Keep the original order in dims_original dims_original_set = set(dims_original) - set(dims_group) - dims_original = [c for c in dims_original if c in dims_original_set] + dims_original = [dim for dim in dims_original if dim in dims_original_set] dims_new = ["chain", "draw"] + dims_original + dims_group - idata.posterior = idata.posterior.transpose(*dims_new) + + # Drop unused dimensions before transposing + dims_to_drop = [dim for dim in idata.posterior.dims if dim not in dims_new] + idata.posterior = idata.posterior.drop_dims(dims_to_drop).transpose(*dims_new) # Compute the actual intercept in all distributional components that have an intercept for pymc_component in self.distributional_components.values(): diff --git a/bambi/backend/terms.py b/bambi/backend/terms.py index a33b40d64..ed2a2e02c 100644 --- a/bambi/backend/terms.py +++ b/bambi/backend/terms.py @@ -110,9 +110,13 @@ def build(self, spec): response_dims = list(spec.response_component.response_term.coords) dims = list(self.coords) + response_dims + coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs) + # Squeeze ensures we don't have a shape of (n, 1) when we mean (n, ) # This happens with categorical predictors with two levels and intercept. - coef = self.build_distribution(self.term.prior, label, dims=dims, **kwargs).squeeze() + # See https://github.com/pymc-devs/pymc/issues/7246 + if len(coef.shape.eval()) == 2 and coef.shape.eval()[-1] == 1: + coef = pt.specify_broadcastable(coef, 1).squeeze() coef = coef[self.term.group_index] return coef, predictor diff --git a/bambi/families/univariate.py b/bambi/families/univariate.py index fa45c43a2..93b91785e 100644 --- a/bambi/families/univariate.py +++ b/bambi/families/univariate.py @@ -403,7 +403,7 @@ def transform_backend_eta(eta, kwargs): def transform_backend_kwargs(kwargs): # P(Y = k) = F(threshold_k - eta) * \prod_{j=1}^{k-1}{1 - F(threshold_j - eta)} p = kwargs.pop("p") - n_columns = p.type.shape[-1] + n_columns = p.shape.eval()[-1] p = pt.concatenate( [ pt.shape_padright(p[..., 0]), diff --git a/pyproject.toml b/pyproject.toml index f1f00dc88..f8b5ac674 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "formulae>=0.5.3", "graphviz", "pandas>=1.0.0", - "pymc>=5.12.0,<5.13.0", + "pymc>=5.13.0", ] [project.optional-dependencies]