From 5df23ac8e69a5d45962f96bff93c028c985bc8fa Mon Sep 17 00:00:00 2001
From: JOSEBA DALMAU <joseba.dalmau@irt-saintexupery.com>
Date: Tue, 17 Oct 2023 11:06:25 +0200
Subject: [PATCH 1/5] Add correction to calibrate

---
 .gitignore                    | 1 +
 deel/puncc/api/calibration.py | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/.gitignore b/.gitignore
index 55115a4..298dce5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@
 # Virtual environment
 puncc-dev-env/
 puncc-user-env/
+.venv/
 
 # Local demo files
 **demo/
diff --git a/deel/puncc/api/calibration.py b/deel/puncc/api/calibration.py
index ebd6f5c..083ac0f 100644
--- a/deel/puncc/api/calibration.py
+++ b/deel/puncc/api/calibration.py
@@ -36,6 +36,7 @@
 
 from deel.puncc.api.utils import alpha_calib_check
 from deel.puncc.api.utils import quantile
+from deel.puncc.api.correction import bonferroni
 
 logger = logging.getLogger(__name__)
 
@@ -161,6 +162,7 @@ def calibrate(
         alpha: float,
         y_pred: Iterable,
         weights: Optional[Iterable] = None,
+        correction: Optional[Callable] = bonferroni,
     ) -> Tuple[np.ndarray]:
         """Compute calibrated prediction sets for new examples w.r.t a
         significance level :math:`\\alpha`.
@@ -170,6 +172,9 @@ def calibrate(
         :param Iterable weights: weights to be associated to the nonconformity
                                  scores. Defaults to None when all the scores
                                  are equiprobable.
+        :param Callable correction: correction for multiple hypothesis testing
+                                    in the case of multivariate regression.
+                                    Defaults to Bonferroni correction.
 
         :returns: prediction set.
                   In case of regression, returns (y_lower, y_upper).
@@ -184,6 +189,8 @@ def calibrate(
 
         if self._residuals is None:
             raise RuntimeError("Run `fit` method before calling `calibrate`.")
+        
+        alpha = correction(alpha)
 
         # Check consistency of alpha w.r.t the size of calibration data
         if weights is None:

From 70fcad11ab88df28a5206c7ef5af3dc4e5387786 Mon Sep 17 00:00:00 2001
From: JOSEBA DALMAU <joseba.dalmau@irt-saintexupery.com>
Date: Thu, 19 Oct 2023 11:18:16 +0200
Subject: [PATCH 2/5] Add ca multivariate cbration unit tests

---
 tests/api/test_calibration.py | 49 +++++++++++++++++++++++++++++++++++
 tests/conftest.py             | 13 ++++++++++
 2 files changed, 62 insertions(+)

diff --git a/tests/api/test_calibration.py b/tests/api/test_calibration.py
index e7bfcd2..d53bb9d 100644
--- a/tests/api/test_calibration.py
+++ b/tests/api/test_calibration.py
@@ -152,3 +152,52 @@ def test_anomaly_detection_calibrator(
     assert anomalies.shape == (117, 2)
     assert not_anomalies is not None
     assert not_anomalies.shape == (33, 2)
+
+
+@pytest.mark.parametrize(
+    "alpha, random_state",
+    [[0.1, 42], [0.3, 42], [0.5, 42], [0.7, 42], [0.9, 42],
+    [np.array(0.1, 0.3), 42], [np.array(0.5, 0.7), 42]]
+)
+def test_multivariate_regression_calibrator(
+    rand_multivariate_reg_data, alpha, random_state
+    ):
+    # Generate data
+    (y_pred_calib, y_calib, y_pred_test, y_test) = rand_multivariate_reg_data
+
+    # Nonconformity score function that takes as argument
+    # the predicted values y_pred = model(X) and the true labels y_true. In
+    # this example, we reimplement the mean absolute deviation that is
+    # already defined in `deel.puncc.api.nonconformity_scores.mad`
+    def nonconformity_function(y_pred, y_true):
+        return np.abs(y_pred - y_true)
+
+    # Prediction sets are computed based on point predictions and
+    # the quantiles of nonconformity scores. The function below returns a
+    # fixed size rectangle around the point predictions.
+    def prediction_set_function(y_pred, scores_quantile):
+        y_lo = y_pred - scores_quantile
+        y_hi = y_pred + scores_quantile
+        return y_lo, y_hi
+
+    # The calibrator is instantiated by passing the two functions defined
+    # above to the constructor.
+    calibrator = BaseCalibrator(
+        nonconf_score_func=nonconformity_function,
+        pred_set_func=prediction_set_function,
+    )
+
+    # The nonconformity scores are computed by calling the `fit` method
+    # on the calibration dataset.
+    calibrator.fit(y_pred=y_pred_calib, y_true=y_calib)
+
+    # The lower and upper bounds of the prediction interval are then returned
+    # by the call to calibrate on the new data w.r.t a risk level alpha.
+    y_pred_lower, y_pred_upper = calibrator.calibrate(
+        y_pred=y_pred_test, alpha=alpha
+    )
+
+    assert y_pred_lower is not None
+    assert y_pred_upper is not None
+    assert not (True in np.isnan(y_pred_lower))
+    assert not (True in np.isnan(y_pred_upper))
diff --git a/tests/conftest.py b/tests/conftest.py
index 689c2fc..780452e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -80,6 +80,19 @@ def rand_reg_data():
     return y_pred_calib, y_calib, y_pred_test, y_test
 
 
+@pytest.fixture
+def rand_multivariate_reg_data():
+    X_pred_calib = 10 * np.random.randn(100, 8)
+    X_pred_test = 10 * np.random.randn(100, 8)
+    X_test = 10 * np.random.randn(100, 8)
+    y_pred_calib = 4 * np.random.randn(100, 8) + 1
+    y_calib = 2 * np.random.randn(100, 2) + 1
+    y_pred_test = 4 * np.random.randn(100, 2) + 2
+    y_test = 2 * np.random.randn(100, 2) + 1
+
+    return y_pred_calib, y_calib, y_pred_test, y_test
+
+
 @pytest.fixture
 def rand_class_data():
     X_pred_calib = 10 * np.random.randn(100, 4)

From 70941d80f5c874f851b449b81794c790b0e8f001 Mon Sep 17 00:00:00 2001
From: JOSEBA DALMAU <joseba.dalmau@irt-saintexupery.com>
Date: Fri, 22 Dec 2023 10:34:22 +0100
Subject: [PATCH 3/5] add compute_quantile method to basecalibrator

---
 deel/puncc/api/calibration.py | 76 ++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 23 deletions(-)

diff --git a/deel/puncc/api/calibration.py b/deel/puncc/api/calibration.py
index 2a46aaa..86ae1f7 100644
--- a/deel/puncc/api/calibration.py
+++ b/deel/puncc/api/calibration.py
@@ -36,7 +36,7 @@
 
 from deel.puncc.api.utils import alpha_calib_check
 from deel.puncc.api.utils import quantile
-from deel.puncc.api.correction import bonferroni
+from deel.puncc.api.corrections import bonferroni
 
 logger = logging.getLogger(__name__)
 
@@ -186,28 +186,7 @@ def calibrate(
             calibration set.
 
         """
-
-        if self._residuals is None:
-            raise RuntimeError("Run `fit` method before calling `calibrate`.")
-        
-        alpha = correction(alpha)
-
-        # Check consistency of alpha w.r.t the size of calibration data
-        if weights is None:
-            alpha_calib_check(alpha=alpha, n=self._len_calib)
-
-        # Compute weighted quantiles
-        ## Lemma 1 of Tibshirani's paper (https://arxiv.org/pdf/1904.06019.pdf)
-        ## The coverage guarantee holds with 1) the inflated
-        ## (1-\alpha)(1+1/n)-th quantile or 2) when adding an infinite term to
-        ## the sequence and computing the $(1-\alpha)$-th empirical quantile.
-        infty_array = np.array([np.inf])
-        lemma_residuals = np.concatenate((self._residuals, infty_array))
-        residuals_Q = quantile(
-            lemma_residuals,
-            1 - alpha,
-            w=weights,
-        )
+        residuals_Q = self.compute_quantile(alpha=alpha, weights=weights, correction=correction)
 
         return self.pred_set_func(y_pred, scores_quantile=residuals_Q)
 
@@ -236,6 +215,57 @@ def get_nonconformity_scores(self) -> np.ndarray:
         :rtype: np.ndarray
         """
         return self._residuals
+    
+    def compute_quantile(
+        self,
+        *,
+        alpha: float,
+        weights: Optional[Iterable] = None,
+        correction: Optional[Callable] = bonferroni,
+    ) -> np.ndarray:
+        """Compute quantile of scores w.r.t a
+        significance level :math:`\\alpha`.
+
+        :param float alpha: significance level (max miscoverage target).
+        :param Iterable weights: weights to be associated to the nonconformity
+                                 scores. Defaults to None when all the scores
+                                 are equiprobable.
+        :param Callable correction: correction for multiple hypothesis testing
+                                    in the case of multivariate regression.
+                                    Defaults to Bonferroni correction.
+
+        :returns: quantile
+        :rtype: ndarray
+
+        :raises RuntimeError: :meth:`compute_quantile` called before :meth:`fit`.
+        :raise ValueError: failed check on :data:`alpha` w.r.t size of the
+            calibration set.
+
+        """
+
+        if self._residuals is None:
+            raise RuntimeError("Run `fit` method before calling `calibrate`.")
+        
+        alpha = correction(alpha)
+
+        # Check consistency of alpha w.r.t the size of calibration data
+        if weights is None:
+            alpha_calib_check(alpha=alpha, n=self._len_calib)
+
+        # Compute weighted quantiles
+        ## Lemma 1 of Tibshirani's paper (https://arxiv.org/pdf/1904.06019.pdf)
+        ## The coverage guarantee holds with 1) the inflated
+        ## (1-\alpha)(1+1/n)-th quantile or 2) when adding an infinite term to
+        ## the sequence and computing the $(1-\alpha)$-th empirical quantile.
+        infty_array = np.array([np.inf])
+        lemma_residuals = np.concatenate((self._residuals, infty_array))
+        residuals_Q = quantile(
+            lemma_residuals,
+            1 - alpha,
+            w=weights,
+        )
+
+        return residuals_Q
 
     @staticmethod
     def barber_weights(weights: np.ndarray) -> np.ndarray:

From 3491cc526638c78f056be0c720c8c57ba75e2007 Mon Sep 17 00:00:00 2001
From: JOSEBA DALMAU <joseba.dalmau@irt-saintexupery.com>
Date: Tue, 2 Jan 2024 12:13:11 +0100
Subject: [PATCH 4/5] develop multivariate calibrator

---
 deel/puncc/api/calibration.py | 18 ++++++++++++++++--
 deel/puncc/api/corrections.py |  4 ++--
 tests/api/test_calibration.py |  2 +-
 tests/conftest.py             |  2 +-
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/deel/puncc/api/calibration.py b/deel/puncc/api/calibration.py
index 86ae1f7..359df77 100644
--- a/deel/puncc/api/calibration.py
+++ b/deel/puncc/api/calibration.py
@@ -135,6 +135,7 @@ def __init__(
         self._len_calib = 0
         self._residuals = None
         self._norm_weights = None
+        self._feature_axis = None
 
     def fit(
         self,
@@ -154,6 +155,8 @@ def fit(
         logger.debug(f"Shape of y_true: {y_true.shape}")
         self._residuals = self.nonconf_score_func(y_pred, y_true)
         self._len_calib = len(self._residuals)
+        if y_pred.ndim > 1:
+            self._feature_axis = -1
         logger.debug("Nonconformity scores computed !")
 
     def calibrate(
@@ -257,12 +260,16 @@ def compute_quantile(
         ## The coverage guarantee holds with 1) the inflated
         ## (1-\alpha)(1+1/n)-th quantile or 2) when adding an infinite term to
         ## the sequence and computing the $(1-\alpha)$-th empirical quantile.
-        infty_array = np.array([np.inf])
-        lemma_residuals = np.concatenate((self._residuals, infty_array))
+        if self._residuals.ndim > 1:
+            infty_array = np.full((1,self._residuals.shape[-1]), np.inf)
+        else:
+            infty_array = np.array([np.inf])
+        lemma_residuals = np.concatenate((self._residuals, infty_array), axis=0)
         residuals_Q = quantile(
             lemma_residuals,
             1 - alpha,
             w=weights,
+            feature_axis=self._feature_axis
         )
 
         return residuals_Q
@@ -455,6 +462,7 @@ class CvPlusCalibrator:
     def __init__(self, kfold_calibrators: dict):
         self.kfold_calibrators_dict = kfold_calibrators
         self._len_calib = None
+        self._feature_axis = None
 
         # Sanity checks:
         #   - The collection of calibrators is not None
@@ -520,6 +528,10 @@ def calibrate(
             if y_pred is None:
                 raise RuntimeError("No prediction obtained with cv+.")
 
+            # Check for multivariate predictions
+            if y_pred.ndim > 1:
+                self._feature_axis = -1
+
             # nonconformity scores
             kth_calibrator = self.kfold_calibrators_dict[k]
             nconf_scores = kth_calibrator.get_nonconformity_scores()
@@ -573,11 +585,13 @@ def calibrate(
             (1 - alpha) * (1 + 1 / self._len_calib),
             w=weights,
             axis=1,
+            feature_axis=self._feature_axis
         )
         y_hi = quantile(
             concat_y_hi,
             (1 - alpha) * (1 + 1 / self._len_calib),
             w=weights,
             axis=1,
+            feature_axis=self._feature_axis
         )
         return y_lo, y_hi
diff --git a/deel/puncc/api/corrections.py b/deel/puncc/api/corrections.py
index d1c5aec..3a4dd5b 100644
--- a/deel/puncc/api/corrections.py
+++ b/deel/puncc/api/corrections.py
@@ -27,7 +27,7 @@
 import numpy as np
 
 
-def bonferroni(alpha: float, nvars: int) -> float:
+def bonferroni(alpha: float, nvars: int =1) -> float:
     """Bonferroni correction for multiple comparisons.
     
     :param float alpha: nominal coverage level.
@@ -38,7 +38,7 @@ def bonferroni(alpha: float, nvars: int) -> float:
     :rtype: float.
     """
     # Sanity checks
-    if alpha <= 0 or alpha >= 1:
+    if np.any(alpha <= 0) or np.any(alpha >= 1):
         raise ValueError("alpha must be in (0,1)")
 
     if nvars <= 0:
diff --git a/tests/api/test_calibration.py b/tests/api/test_calibration.py
index d53bb9d..aa14ae5 100644
--- a/tests/api/test_calibration.py
+++ b/tests/api/test_calibration.py
@@ -157,7 +157,7 @@ def test_anomaly_detection_calibrator(
 @pytest.mark.parametrize(
     "alpha, random_state",
     [[0.1, 42], [0.3, 42], [0.5, 42], [0.7, 42], [0.9, 42],
-    [np.array(0.1, 0.3), 42], [np.array(0.5, 0.7), 42]]
+    [np.array([0.1, 0.3]), 42], [np.array([0.5, 0.7]), 42]]
 )
 def test_multivariate_regression_calibrator(
     rand_multivariate_reg_data, alpha, random_state
diff --git a/tests/conftest.py b/tests/conftest.py
index 780452e..8abfdcf 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -85,7 +85,7 @@ def rand_multivariate_reg_data():
     X_pred_calib = 10 * np.random.randn(100, 8)
     X_pred_test = 10 * np.random.randn(100, 8)
     X_test = 10 * np.random.randn(100, 8)
-    y_pred_calib = 4 * np.random.randn(100, 8) + 1
+    y_pred_calib = 4 * np.random.randn(100, 2) + 1
     y_calib = 2 * np.random.randn(100, 2) + 1
     y_pred_test = 4 * np.random.randn(100, 2) + 2
     y_test = 2 * np.random.randn(100, 2) + 1

From 01da86610c34b1b5561acf98bc930b710b6f6a3f Mon Sep 17 00:00:00 2001
From: JOSEBA DALMAU <joseba.dalmau@irt-saintexupery.com>
Date: Tue, 2 Jan 2024 17:04:55 +0100
Subject: [PATCH 5/5] erase trailing whitespaces

---
 deel/puncc/api/calibration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/deel/puncc/api/calibration.py b/deel/puncc/api/calibration.py
index 359df77..818bf13 100644
--- a/deel/puncc/api/calibration.py
+++ b/deel/puncc/api/calibration.py
@@ -218,7 +218,7 @@ def get_nonconformity_scores(self) -> np.ndarray:
         :rtype: np.ndarray
         """
         return self._residuals
-    
+
     def compute_quantile(
         self,
         *,
@@ -248,7 +248,7 @@ def compute_quantile(
 
         if self._residuals is None:
             raise RuntimeError("Run `fit` method before calling `calibrate`.")
-        
+
         alpha = correction(alpha)
 
         # Check consistency of alpha w.r.t the size of calibration data