Internal refactor (#804)

* Start migration * Update prior scaler * More updates. The backend needs futher updates * First implementation working * Fix aliases * More fixes thanks to tests * Fix more tests. Detected problems with PyMC * Adapt code to multivarite-like families * Adapt code to multivarite-like families * Update dependencies * Re-run several example notebooks * add function to create xr.Dataset for bayeux based inferences * rerun more examples * Rerun more examples * bump pymc version to latest release * Add function to add observed_data group when using bayeux * Rename arguments and values to indicate we want the parameters of the response distribution * fix broken link * Add dim name to families with categorical response levels
bambinos · Jul 4, 2024 · 4cc3103 · 4cc3103
1 parent 6180e73
commit 4cc3103
Show file tree

Hide file tree

Showing 59 changed files with 15,978 additions and 14,941 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -384,8 +384,7 @@ missing-member-max-choices=1
 
 # List of note tags to take in consideration, separated by a comma.
 notes=FIXME,
-      XXX,
-      TODO
+      XXX
 
 
 [SPELLING]

diff --git a/bambi/backend/model_components.py b/bambi/backend/model_components.py
@@ -6,7 +6,6 @@
 from bambi.backend.utils import get_distribution_from_prior
 from bambi.families.multivariate import MultivariateFamily
 from bambi.families.univariate import Categorical, Cumulative, StoppingRatio
-from bambi.utils import get_aliased_name
 
 
 ORDINAL_FAMILIES = (Cumulative, StoppingRatio)
@@ -18,18 +17,14 @@ def __init__(self, component):
         self.output = 0
 
     def build(self, pymc_backend, bmb_model):
-        response_aliased_name = get_aliased_name(bmb_model.response_component.response_term)
-        if self.component.alias:
-            label = self.component.alias
-        else:
-            label = f"{response_aliased_name}_{self.component.name}"
+        label = self.component.alias if self.component.alias else self.component.name
 
         # NOTE: This could be handled in a different manner in the future, only applies to
         # thresholds and assumes we always do it when using ordinal families.
         extra_args = {}
         if isinstance(bmb_model.family, ORDINAL_FAMILIES):
             threshold_dim = label + "_dim"
-            threshold_values = np.arange(len(bmb_model.response_component.response_term.levels) - 1)
+            threshold_values = np.arange(len(bmb_model.response_component.term.levels) - 1)
             extra_args["dims"] = threshold_dim
             pymc_backend.model.add_coords({threshold_dim: threshold_values})
 
@@ -58,7 +53,7 @@ def build(self, pymc_backend, bmb_model):
             self.build_intercept(bmb_model)
             self.build_offsets()
             self.build_common_terms(pymc_backend, bmb_model)
-            self.build_hsgp_terms(pymc_backend, bmb_model)
+            self.build_hsgp_terms(pymc_backend)
             self.build_group_specific_terms(pymc_backend, bmb_model)
 
     def build_intercept(self, bmb_model):
@@ -78,7 +73,7 @@ def build_common_terms(self, pymc_backend, bmb_model):
         """Add common (fixed) terms to the PyMC model.
 
         We have linear predictors of the form 'X @ b + Z @ u'.
-        This creates the 'b' parameter vector in PyMC, computes `X @ b`, and adds it to ``self.mu``.
+        This creates the 'b' parameter vector in PyMC, computes `X @ b`, and adds it to `self.mu`.
 
         Parameters
         ----------
@@ -114,25 +109,25 @@ def build_common_terms(self, pymc_backend, bmb_model):
             # Add term to linear predictor
             self.output += pt.dot(data, coefs)
 
-    def build_hsgp_terms(self, pymc_backend, bmb_model):
+    def build_hsgp_terms(self, pymc_backend):
         """Add HSGP (Hilbert-Space Gaussian Process approximation) terms to the PyMC model.
 
         The linear predictor 'X @ b + Z @ u' can be augmented with non-parametric HSGP terms
-        'f(x)'. This creates the 'f(x)' and adds it ``self.output``.
+        'f(x)'. This creates the 'f(x)' and adds it `self.output`.
         """
         for term in self.component.hsgp_terms.values():
             hsgp_term = HSGPTerm(term)
             for name, values in hsgp_term.coords.items():
                 if name not in pymc_backend.model.coords:
                     pymc_backend.model.add_coords({name: values})
-            self.output += hsgp_term.build(bmb_model)
+            self.output += hsgp_term.build()
 
     def build_group_specific_terms(self, pymc_backend, bmb_model):
         """Add group-specific (random or varying) terms to the PyMC model.
 
         We have linear predictors of the form 'X @ b + Z @ u'.
         This creates the 'u' parameter vector in PyMC, computes `Z @ u`, and adds it to
-        ``self.output``.
+        `self.output`.
         """
         for term in self.component.group_specific_terms.values():
             group_specific_term = GroupSpecificTerm(term, bmb_model.noncentered)
@@ -156,22 +151,23 @@ def build_group_specific_terms(self, pymc_backend, bmb_model):
             else:
                 self.output += coef * predictor
 
-    def build_response(self, pymc_backend, bmb_model):
-        # Extract the response term from the Bambi family
-        response_term = bmb_model.response_component.response_term
-
-        # Create and build the response term
-        response_term = ResponseTerm(response_term, bmb_model.family)
-        response_term.build(pymc_backend, bmb_model)
-
     def add_response_coords(self, pymc_backend, bmb_model):
-        response_term = bmb_model.response_component.response_term
-        response_name = get_aliased_name(response_term)
-        dim_name = f"{response_name}_obs"
+        response_term = bmb_model.response_component.term
+        dim_name = "__obs__"
         dim_value = np.arange(response_term.shape[0])
         pymc_backend.model.add_coords({dim_name: dim_value})
 
 
+class ResponseComponent:
+    def __init__(self, component):
+        self.component = component
+
+    def build(self, pymc_backend, bmb_model):
+        # Create and build the response term
+        response_term = ResponseTerm(self.component.term, bmb_model.family)
+        response_term.build(pymc_backend, bmb_model)
+
+
 # # NOTE: Here for historical reasons, not supposed to work now at least for now
 # def add_lkj(backend, terms, eta=1):
 #     """Add correlated prior for group-specific effects.
@@ -183,15 +179,15 @@ def add_response_coords(self, pymc_backend, bmb_model):
 #     Parameters
 #     ----------
 #     terms: list
-#         A list of terms that share a common grouper (i.e. ``1|Group`` and ``Variable|Group`` in
+#         A list of terms that share a common grouper (i.e. `1|Group` and `Variable|Group` in
 #         formula notation).
 #     eta: num
 #         The value for the eta parameter in the LKJ distribution.
 
 #     Parameters
 #     ----------
 #     mu
-#         The contribution to the linear predictor of the roup-specific terms in ``terms``.
+#         The contribution to the linear predictor of the roup-specific terms in `terms`.
 #     """
 
 #     # Parameters