pymc-labs · ColtAllen · Jun 27, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 18, 2024
diff --git a/pymc_marketing/clv/models/gamma_gamma.py b/pymc_marketing/clv/models/gamma_gamma.py
@@ -17,32 +17,28 @@
 import pytensor.tensor as pt
 import xarray
 from pymc.util import RandomState
-from pytensor.tensor import TensorVariable
 
-from pymc_marketing.clv.models.basic import CLVModel
+from pymc_marketing.clv.models import CLVModel
 from pymc_marketing.clv.utils import customer_lifetime_value, to_xarray
 
 
 class BaseGammaGammaModel(CLVModel):
     def distribution_customer_spend(
         self,
-        customer_id: np.ndarray | pd.Series,
-        mean_transaction_value: np.ndarray | pd.Series | TensorVariable,
-        frequency: np.ndarray | pd.Series | TensorVariable,
+        data: pd.DataFrame,
         random_seed: RandomState | None = None,
     ) -> xarray.DataArray:
         """Posterior distribution of transaction value per customer"""
 
-        x = frequency
-        z_mean = mean_transaction_value
+        x = data["frequency"]
+        z_mean = data["monetary_value"]
 
-        coords = {"customer_id": np.unique(customer_id)}
+        coords = {"customer_id": np.unique(data["customer_id"])}
         with pm.Model(coords=coords):
             p = pm.HalfFlat("p")
             q = pm.HalfFlat("q")
             v = pm.HalfFlat("v")
 
-            # Closed form solution to the posterior of nu
             # Eq 5 from [1], p.3
             nu = pm.Gamma("nu", p * x + q, v + x * z_mean, dims=("customer_id",))
             pm.Deterministic("mean_spend", p / nu, dims=("customer_id",))
@@ -55,19 +51,25 @@ def distribution_customer_spend(
 
     def expected_customer_spend(
         self,
-        customer_id: np.ndarray | pd.Series,
-        mean_transaction_value: np.ndarray | pd.Series,
-        frequency: np.ndarray | pd.Series,
+        data: pd.DataFrame,
     ) -> xarray.DataArray:
         """Expected transaction value per customer
 
         Eq 5 from [1], p.3
 
         Adapted from: https://github.com/CamDavidsonPilon/lifetimes/blob/aae339c5437ec31717309ba0ec394427e19753c4/lifetimes/fitters/gamma_gamma_fitter.py#L117
+
+        data: pd.DataFrame
+        DataFrame containing the following columns:
+            - customer_id: Customer labels. Must not repeat.
+            - monetary_value: Mean transaction value of repeat purchases for each customer.
+            - frequency: Number of transactions observed for each customer.
         """
 
         mean_transaction_value, frequency = to_xarray(
-            customer_id, mean_transaction_value, frequency
+            data["customer_id"],
+            data["monetary_value"],
+            data["frequency"],
         )
         posterior = self.fit_result
 
@@ -111,45 +113,34 @@ def expected_new_customer_spend(self) -> xarray.DataArray:
         # Closed form solution to the posterior of nu
         # Eq 3 from [1], p.3
         mean_spend = p_mean * v_mean / (q_mean - 1)
-        # We could also provide the variance
+        # TODO: We could also provide the variance
         # var_spend = (p_mean ** 2 * v_mean ** 2) / ((q_mean - 1) ** 2 * (q_mean - 2))
 
         return mean_spend
 
     def expected_customer_lifetime_value(
         self,
         transaction_model: CLVModel,
-        customer_id: np.ndarray | pd.Series,
-        mean_transaction_value: np.ndarray | pd.Series,
-        frequency: np.ndarray | pd.Series,
-        recency: np.ndarray | pd.Series,
-        T: np.ndarray | pd.Series,
-        time: int = 12,
-        discount_rate: float = 0.01,
-        freq: str = "D",
+        data: pd.DataFrame,
+        future_t: int = 12,
+        discount_rate: float = 0.00,
+        time_unit: str = "D",
     ) -> xarray.DataArray:
         """Expected customer lifetime value.
 
         See clv.utils.customer_lifetime_value for details on the meaning of each parameter
         """
 
         # Use the Gamma-Gamma estimates for the monetary_values
-        adjusted_monetary_value = self.expected_customer_spend(
-            customer_id=customer_id,
-            mean_transaction_value=mean_transaction_value,
-            frequency=frequency,
-        )
+        predicted_monetary_value = self.expected_customer_spend(data=data)
 
         return customer_lifetime_value(
             transaction_model=transaction_model,
-            customer_id=customer_id,
-            frequency=frequency,
-            recency=recency,
-            T=T,
-            monetary_value=adjusted_monetary_value,
-            time=time,
+            transaction_data=data,
+            monetary_value=predicted_monetary_value,
+            future_t=future_t,
             discount_rate=discount_rate,
-            freq=freq,
+            time_unit=time_unit,
         )
 
 
@@ -171,7 +162,7 @@ class GammaGammaModel(BaseGammaGammaModel):
     data: pd.DataFrame
         DataFrame containing the following columns:
             - customer_id: Customer labels. Must not repeat.
-            - mean_transaction_value: Mean transaction value of each customer.
+            - monetary_value: Mean transaction value of repeat purchases for each customer.
             - frequency: Number of transactions observed for each customer.
     model_config: dict, optional
         Dictionary of model prior parameters. If not provided, the model will use default priors specified in the
@@ -181,7 +172,7 @@ class GammaGammaModel(BaseGammaGammaModel):
 
     Examples
     --------
-        Gamma-Gamma model condioned on mean transaction value
+        Gamma-Gamma model conditioned on mean transaction value
 
         .. code-block:: python
 
@@ -191,7 +182,7 @@ class GammaGammaModel(BaseGammaGammaModel):
             model = GammaGammaModel(
                 data=pd.DataFrame({
                     "customer_id": [0, 1, 2, 3, ...],
-                    "mean_transaction_value" :[23.5, 19.3, 11.2, 100.5, ...],
+                    "monetary_value" :[23.5, 19.3, 11.2, 100.5, ...],
                     "frequency": [6, 8, 2, 1, ...],
                 }),
                 model_config={
@@ -214,7 +205,7 @@ class GammaGammaModel(BaseGammaGammaModel):
             # Predict spend of customers for which we know transaction history, conditioned on data.
             expected_customer_spend = model.expected_customer_spend(
                 customer_id=[0, 1, 2, 3, ...],
-                mean_transaction_value=[23.5, 19.3, 11.2, 100.5, ...],
+                monetary_value=[23.5, 19.3, 11.2, 100.5, ...],
                 frequency=[6, 8, 2, 1, ...],
             )
             print(expected_customer_spend.mean("customer_id"))
@@ -243,7 +234,7 @@ def __init__(
     ):
         self._validate_cols(
             data,
-            required_cols=["customer_id", "mean_transaction_value", "frequency"],
+            required_cols=["customer_id", "monetary_value", "frequency"],
             must_be_unique=["customer_id"],
         )
         super().__init__(
@@ -259,7 +250,7 @@ def default_model_config(self) -> dict:
         }
 
     def build_model(self):
-        z_mean = pt.as_tensor_variable(self.data["mean_transaction_value"])
+        z_mean = pt.as_tensor_variable(self.data["monetary_value"])
         x = pt.as_tensor_variable(self.data["frequency"])
 
         p_prior = self._create_distribution(self.model_config["p_prior"])
@@ -287,6 +278,7 @@ def build_model(self):
             )
 
 
+# TODO: This model requires further evaluation and mention in a notebook.
 class GammaGammaModelIndividual(BaseGammaGammaModel):
     """Gamma-Gamma model
 
@@ -304,9 +296,9 @@ class GammaGammaModelIndividual(BaseGammaGammaModel):
     ----------
     data: pd.DataFrame
         Dataframe containing the following columns:
-            - customer_id: Customer labels. The same value should be used for each observation
+            - customer_id: Customer labels. The same value should be used for each unique customer.
         coming from the same customer.
-            - individual_transaction_value: Value of individual transactions.
+            - individual_transaction_value: Monetary values of each purchase for each customer.
     model_config: dict, optional
         Dictionary of model prior parameters. If not provided, the model will use default priors specified in the
         `default_model_config` class attribute.
@@ -412,89 +404,3 @@ def build_model(self):
             pm.Gamma(
                 "spend", p, nu[self.data["customer_id"]], observed=z, dims=("obs",)
             )
-
-    def _summarize_mean_data(self, customer_id, individual_transaction_value):
-        df = pd.DataFrame(
-            {
-                "customer_id": customer_id,
-                "individual_transaction_value": individual_transaction_value,
-            }
-        )
-        gdf = df.groupby("customer_id")["individual_transaction_value"].aggregate(
-            ("count", "mean")
-        )
-        customer_id = gdf.index
-        x = gdf["count"]
-        z_mean = gdf["mean"]
-
-        return customer_id, z_mean, x
-
-    def distribution_customer_spend(  # type: ignore [override]
-        self,
-        customer_id: np.ndarray | pd.Series,
-        individual_transaction_value: np.ndarray | pd.Series | TensorVariable,
-        random_seed: RandomState | None = None,
-    ) -> xarray.DataArray:
-        """Return distribution of transaction value per customer"""
-
-        customer_id, z_mean, x = self._summarize_mean_data(
-            customer_id, individual_transaction_value
-        )
-
-        return super().distribution_customer_spend(
-            customer_id=customer_id,
-            mean_transaction_value=z_mean,
-            frequency=x,
-            random_seed=random_seed,
-        )
-
-    def expected_customer_spend(
-        self,
-        customer_id: np.ndarray | pd.Series,
-        individual_transaction_value: np.ndarray | pd.Series | TensorVariable,
-        random_seed: RandomState | None = None,
-    ) -> xarray.DataArray:
-        """Return expected transaction value per customer"""
-
-        customer_id, z_mean, x = self._summarize_mean_data(
-            customer_id, individual_transaction_value
-        )
-
-        return super().expected_customer_spend(
-            customer_id=customer_id,
-            mean_transaction_value=z_mean,
-            frequency=x,
-            random_seed=random_seed,  # type: ignore [call-arg]
-        )
-
-    def expected_customer_lifetime_value(  # type: ignore [override]
-        self,
-        transaction_model: CLVModel,
-        customer_id: np.ndarray | pd.Series,
-        individual_transaction_value: np.ndarray | pd.Series | TensorVariable,
-        recency: np.ndarray | pd.Series,
-        T: np.ndarray | pd.Series,
-        time: int = 12,
-        discount_rate: float = 0.01,
-        freq: str = "D",
-    ) -> xarray.DataArray:
-        """Return expected customer lifetime value.
-
-        See clv.utils.customer_lifetime_value for details on the meaning of each parameter
-        """
-
-        customer_id, z_mean, x = self._summarize_mean_data(
-            customer_id, individual_transaction_value
-        )
-
-        return super().expected_customer_lifetime_value(
-            transaction_model=transaction_model,
-            customer_id=customer_id,
-            mean_transaction_value=z_mean,
-            frequency=x,
-            recency=recency,
-            T=T,
-            time=time,
-            discount_rate=discount_rate,
-            freq=freq,
-        )