SciTools · fnattino · Jul 25, 2024 · Jul 25, 2024 · Aug 27, 2024 · Sep 11, 2024
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -71,6 +71,10 @@ This document explains the changes made to Iris for this release
    the concatenation axis. This issue can be avoided by disabling the
    problematic check. (:pull:`5926`)
 
+#. `@fnattino`_ enabled lazy cube interpolation using the linear and
+   nearest-neighbour interpolators (:class:`iris.analysis.Linear` and
+   :class:`iris.analysis.Nearest`). (:pull:`6084`)
+
 🔥 Deprecations
 ===============
 
@@ -101,6 +105,7 @@ This document explains the changes made to Iris for this release
     Whatsnew author names (@github name) in alphabetical order. Note that,
     core dev names are automatically included by the common_links.inc:
 
+.. _@fnattino: https://github.com/fnattino
 .. _@jrackham-mo: https://github.com/jrackham-mo
 
 

diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py
@@ -12,6 +12,7 @@
 from numpy.lib.stride_tricks import as_strided
 import numpy.ma as ma
 
+from iris._lazy_data import map_complete_blocks
 from iris.coords import AuxCoord, DimCoord
 import iris.util
 
@@ -163,6 +164,15 @@ def snapshot_grid(cube):
     return x.copy(), y.copy()
 
 
+def _interpolated_dtype(dtype, method):
+    """Determine the minimum base dtype required by the underlying interpolator."""
+    if method == "nearest":
+        result = dtype
+    else:
+        result = np.result_type(_DEFAULT_DTYPE, dtype)
+    return result
+
+
 class RectilinearInterpolator:
     """Provide support for performing nearest-neighbour or linear interpolation.
 
@@ -200,13 +210,8 @@ def __init__(self, src_cube, coords, method, extrapolation_mode):
               set to NaN.
 
         """
-        # Trigger any deferred loading of the source cube's data and snapshot
-        # its state to ensure that the interpolator is impervious to external
-        # changes to the original source cube. The data is loaded to prevent
-        # the snapshot having lazy data, avoiding the potential for the
-        # same data to be loaded again and again.
-        if src_cube.has_lazy_data():
-            src_cube.data
+        # Snapshot the cube state to ensure that the interpolator is impervious
+        # to external changes to the original source cube.
         self._src_cube = src_cube.copy()
         # Coordinates defining the dimensions to be interpolated.
         self._src_coords = [self._src_cube.coord(coord) for coord in coords]
@@ -277,17 +282,27 @@ def _account_for_inverted(self, data):
             data = data[tuple(dim_slices)]
         return data
 
-    def _interpolate(self, data, interp_points):
+    @staticmethod
+    def _interpolate(
+        data,
+        src_points,
+        interp_points,
+        interp_shape,
+        method="linear",
+        extrapolation_mode="nanmask",
+    ):
         """Interpolate a data array over N dimensions.
 
-        Create and cache the underlying interpolator instance before invoking
-        it to perform interpolation over the data at the given coordinate point
-        values.
+        Create the interpolator instance before invoking it to perform
+        interpolation over the data at the given coordinate point values.
 
         Parameters
         ----------
         data : ndarray
             A data array, to be interpolated in its first 'N' dimensions.
+        src_points :
+            The point values defining the dimensions to be interpolated.
+            (len(src_points) should be N).
         interp_points : ndarray
             An array of interpolation coordinate values.
             Its shape is (..., N) where N is the number of interpolation
@@ -296,44 +311,53 @@ def _interpolate(self, data, interp_points):
             coordinate, which is mapped to the i'th data dimension.
             The other (leading) dimensions index over the different required
             sample points.
+        interp_shape :
+            The shape of the interpolated array in its first 'N' dimensions
+            (len(interp_shape) should be N).
+        method : str
+            Interpolation method (see :class:`iris.analysis._interpolation.RectilinearInterpolator`).
+        extrapolation_mode : str
+            Extrapolation mode (see :class:`iris.analysis._interpolation.RectilinearInterpolator`).
 
         Returns
         -------
         :class:`np.ndarray`.
-            Its shape is "points_shape + extra_shape",
+            Its shape is "interp_shape + extra_shape",
             where "extra_shape" is the remaining non-interpolated dimensions of
-            the data array (i.e. 'data.shape[N:]'), and "points_shape" is the
-            leading dimensions of interp_points,
-            (i.e. 'interp_points.shape[:-1]').
-
+            the data array (i.e. 'data.shape[N:]').
         """
         from iris.analysis._scipy_interpolate import _RegularGridInterpolator
 
-        dtype = self._interpolated_dtype(data.dtype)
+        dtype = _interpolated_dtype(data.dtype, method)
         if data.dtype != dtype:
             # Perform dtype promotion.
             data = data.astype(dtype)
 
-        mode = EXTRAPOLATION_MODES[self._mode]
-        if self._interpolator is None:
-            # Cache the interpolator instance.
-            # NB. The constructor of the _RegularGridInterpolator class does
-            # some unnecessary checks on the fill_value parameter,
-            # so we set it afterwards instead. Sneaky. ;-)
-            self._interpolator = _RegularGridInterpolator(
-                self._src_points,
-                data,
-                method=self.method,
-                bounds_error=mode.bounds_error,
-                fill_value=None,
-            )
-        else:
-            self._interpolator.values = data
 for coord in cube.dim_coords + cube.aux_coords: 
     new_coord, dims = construct_new_coord(coord) 
     gen_new_cube() 
 weights = self.compute_interp_weights(xi, method) 
 return self.interp_using_pre_computed_weights(weights) 
 for coord in cube.dim_coords + cube.aux_coords: 
     new_coord, dims = construct_new_coord(coord) 
     gen_new_cube() 
 weights = self.compute_interp_weights(xi, method) 
 return self.interp_using_pre_computed_weights(weights) 
+        # Determine the shape of the interpolated result.
+        ndims_interp = len(interp_shape)
+        extra_shape = data.shape[ndims_interp:]
+        final_shape = [*interp_shape, *extra_shape]
+
+        mode = EXTRAPOLATION_MODES[extrapolation_mode]
+        _data = np.ma.getdata(data)
+        # NB. The constructor of the _RegularGridInterpolator class does
+        # some unnecessary checks on the fill_value parameter,
+        # so we set it afterwards instead. Sneaky. ;-)
+        interpolator = _RegularGridInterpolator(
+            src_points,
+            _data,
+            method=method,
+            bounds_error=mode.bounds_error,
+            fill_value=None,
+        )
+        interpolator.fill_value = mode.fill_value
+        result = interpolator(interp_points)
 
-        # We may be re-using a cached interpolator, so ensure the fill
-        # value is set appropriately for extrapolating data values.
-        self._interpolator.fill_value = mode.fill_value
-        result = self._interpolator(interp_points)
+        # The interpolated result has now shape "points_shape + extra_shape"
+        # where "points_shape" is the leading dimension of "interp_points"
+        # (i.e. 'interp_points.shape[:-1]'). We reshape it to match the shape
+        # of the interpolated dimensions.
+        result = result.reshape(final_shape)
 
         if result.dtype != data.dtype:
             # Cast the data dtype to be as expected. Note that, the dtype
@@ -346,13 +370,11 @@ def _interpolate(self, data, interp_points):
             # `data` is not a masked array.
             src_mask = np.ma.getmaskarray(data)
             # Switch the extrapolation to work with mask values.
-            self._interpolator.fill_value = mode.mask_fill_value
-            self._interpolator.values = src_mask
-            mask_fraction = self._interpolator(interp_points)
+            interpolator.fill_value = mode.mask_fill_value
+            interpolator.values = src_mask
+            mask_fraction = interpolator(interp_points)
             new_mask = mask_fraction > 0
-            if ma.isMaskedArray(data) or np.any(new_mask):
-                result = np.ma.MaskedArray(result, new_mask)
-
+            result = np.ma.MaskedArray(result, new_mask)
         return result
 
     def _resample_coord(self, sample_points, coord, coord_dims):
@@ -530,7 +552,7 @@ def _points(self, sample_points, data, data_dims=None):
         _, src_order = zip(*sorted(dmap.items(), key=operator.itemgetter(0)))
 
         # Prepare the sample points for interpolation and calculate the
-        # shape of the interpolated result.
+        # shape of the interpolated dimensions.
         interp_points = []
         interp_shape = []
         for index, points in enumerate(sample_points):
@@ -539,10 +561,6 @@ def _points(self, sample_points, data, data_dims=None):
             interp_points.append(points)
             interp_shape.append(points.size)
 
-        interp_shape.extend(
-            length for dim, length in enumerate(data.shape) if dim not in di
-        )
-
         # Convert the interpolation points into a cross-product array
         # with shape (n_cross_points, n_dims)
         interp_points = np.asarray([pts for pts in product(*interp_points)])
@@ -554,9 +572,20 @@ def _points(self, sample_points, data, data_dims=None):
             # Transpose data in preparation for interpolation.
             data = np.transpose(data, interp_order)
 
-        # Interpolate and reshape the data ...
-        result = self._interpolate(data, interp_points)
-        result = result.reshape(interp_shape)
+        # Interpolate the data, ensuring the interpolated dimensions
+        # are not chunked.
+        dims_not_chunked = [dmap[d] for d in di]
+        result = map_complete_blocks(
+            data,
+            self._interpolate,
+            dims=dims_not_chunked,
+            out_sizes=interp_shape,
+            src_points=self._src_points,
+            interp_points=interp_points,
+            interp_shape=interp_shape,
+            method=self._method,
+            extrapolation_mode=self._mode,
-            src_points=self._src_points,
-            interp_points=interp_points,
-            interp_shape=interp_shape,
-            method=self._method,
-            extrapolation_mode=self._mode,
+            args=[self],
+            interp_shape=interp_shape,
-            src_points=self._src_points,
-            interp_points=interp_points,
-            interp_shape=interp_shape,
-            method=self._method,
-            extrapolation_mode=self._mode,
+            args=[self],
+            interp_shape=interp_shape,
+        )
 
         if src_order != dims:
             # Restore the interpolated result to the original
@@ -592,7 +621,7 @@ def __call__(self, sample_points, collapse_scalar=True):
 
         sample_points = _canonical_sample_points(self._src_coords, sample_points)
 
-        data = self._src_cube.data
+        data = self._src_cube.core_data()
         # Interpolate the cube payload.
         interpolated_data = self._points(sample_points, data)
 

diff --git a/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py b/lib/iris/tests/unit/analysis/interpolation/test_RectilinearInterpolator.py
@@ -499,24 +499,37 @@ def test_orthogonal_cube_squash(self):
         self.assertEqual(result_cube, non_collapsed_cube[0, ...])
 
 
+class Test___call___real_data(ThreeDimCube):
+    def test_src_cube_data_loaded(self):
+        # If the source cube has real data when the interpolator is
+        # instantiated, then the interpolated result should also have
+        # real data.
+        self.assertFalse(self.cube.has_lazy_data())
+
+        # Perform interpolation and check the data is real.
+        interpolator = RectilinearInterpolator(
+            self.cube, ["latitude"], LINEAR, EXTRAPOLATE
+        )
+        res = interpolator([[1.5]])
+        self.assertFalse(res.has_lazy_data())
+
+
 class Test___call___lazy_data(ThreeDimCube):
     def test_src_cube_data_loaded(self):
-        # RectilinearInterpolator operates using a snapshot of the source cube.
         # If the source cube has lazy data when the interpolator is
-        # instantiated we want to make sure the source cube's data is
-        # loaded as a consequence of interpolation to avoid the risk
-        # of loading it again and again.
+        # instantiated, then the interpolated result should also have
+        # lazy data.
 
         # Modify self.cube to have lazy data.
         self.cube.data = as_lazy_data(self.data)
         self.assertTrue(self.cube.has_lazy_data())
 
-        # Perform interpolation and check the data has been loaded.
+        # Perform interpolation and check the data is lazy..
         interpolator = RectilinearInterpolator(
             self.cube, ["latitude"], LINEAR, EXTRAPOLATE
         )
-        interpolator([[1.5]])
-        self.assertFalse(self.cube.has_lazy_data())
+        res = interpolator([[1.5]])
+        self.assertTrue(res.has_lazy_data())
 
 
 class Test___call___time(tests.IrisTest):