From 5196633602f17a35f9ffc06e68f208187503f7e6 Mon Sep 17 00:00:00 2001
From: Ian Thomas <ianthomas23@gmail.com>
Date: Tue, 1 Aug 2023 10:00:29 +0100
Subject: [PATCH] CPU shift_and_insert function (#1260)

---
 datashader/reductions.py | 46 ++++++++--------------------------
 datashader/utils.py      | 53 +++++++++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 56 deletions(-)

diff --git a/datashader/reductions.py b/datashader/reductions.py
index 30047e577..e8bb59134 100644
--- a/datashader/reductions.py
+++ b/datashader/reductions.py
@@ -32,6 +32,7 @@
     Expr, ngjit, nansum_missing, nanmax_in_place, nansum_in_place, row_min_in_place,
     nanmax_n_in_place_4d, nanmax_n_in_place_3d, nanmin_n_in_place_4d, nanmin_n_in_place_3d,
     row_max_n_in_place_4d, row_max_n_in_place_3d, row_min_n_in_place_4d, row_min_n_in_place_3d,
+    shift_and_insert,
 )
 
 
@@ -1473,11 +1474,7 @@ def _antialias_stage_2(self, self_intersect, array_module) -> tuple[AntialiasSta
     def _append(x, y, agg, field):
         if not isnull(field):
             # Always inserts at front of agg's third dimension.
-            # Bump previous values along to make room for new value.
-            n = agg.shape[2]
-            for j in range(n-1, 0, -1):
-                agg[y, x, j] = agg[y, x, j-1]
-            agg[y, x, 0] = field
+            shift_and_insert(agg[y, x], field, 0)
             return 0
         return -1
 
@@ -1502,10 +1499,7 @@ def _append(x, y, agg, field):
             n = agg.shape[2]
             for i in range(n):
                 if isnull(agg[y, x, i]) or field > agg[y, x, i]:
-                    # Bump previous values along to make room for new value.
-                    for j in range(n-1, i, -1):
-                        agg[y, x, j] = agg[y, x, j-1]
-                    agg[y, x, i] = field
+                    shift_and_insert(agg[y, x], field, i)
                     return i
         return -1
 
@@ -1574,10 +1568,7 @@ def _append(x, y, agg, field):
             n = agg.shape[2]
             for i in range(n):
                 if isnull(agg[y, x, i]) or field < agg[y, x, i]:
-                    # Bump previous values along to make room for new value.
-                    for j in range(n-1, i, -1):
-                        agg[y, x, j] = agg[y, x, j-1]
-                    agg[y, x, i] = field
+                    shift_and_insert(agg[y, x], field, i)
                     return i
         return -1
 
@@ -1735,17 +1726,13 @@ def _antialias_stage_2(self, self_intersect, array_module) -> tuple[AntialiasSta
     # CPU append functions
     # All where._append* functions have an extra argument which is the update index.
     # For 3D aggs like max_n, this is the index of insertion in the final dimension,
-    # and the previous values from this index upwards are bumped along to make room
+    # and the previous values from this index upwards are shifted along to make room
     # for the new value.
     @staticmethod
     @ngjit
     def _append(x, y, agg, field, update_index):
         if agg.ndim > 2:
-            # Bump previous values along to make room for new value.
-            n = agg.shape[2]
-            for i in range(n-1, update_index, -1):
-                agg[y, x, i] = agg[y, x, i-1]
-            agg[y, x, update_index] = field
+            shift_and_insert(agg[y, x], field, update_index)
         else:
             agg[y, x] = field
         return update_index
@@ -1755,11 +1742,7 @@ def _append(x, y, agg, field, update_index):
     def _append_antialias(x, y, agg, field, aa_factor, update_index):
         # Ignore aa_factor.
         if agg.ndim > 2:
-            # Bump previous values along to make room for new value.
-            n = agg.shape[2]
-            for i in range(n-1, update_index, -1):
-                agg[y, x, i] = agg[y, x, i-1]
-            agg[y, x, update_index] = field
+            shift_and_insert(agg[y, x], field, update_index)
         else:
             agg[y, x] = field
 
@@ -1858,10 +1841,7 @@ def combine_cpu_n(aggs, selector_aggs):
                             update_index = append(x, y, selector_aggs[0][:, :, cat, :], value)
                             if update_index < 0:
                                 break
-                            # Bump values along in the same way that append() has done above.
-                            for j in range(n-1, update_index, -1):
-                                aggs[0][y, x, cat, j] = aggs[0][y, x, cat, j-1]
-                            aggs[0][y, x, cat, update_index] = aggs[1][y, x, cat, i]
+                            shift_and_insert(aggs[0][y, x, cat], aggs[1][y, x, cat, i], update_index)
 
         @nb_cuda.jit
         def combine_cuda_2d(aggs, selector_aggs):
@@ -2229,10 +2209,7 @@ def _append(x, y, agg, field):
             n = agg.shape[2]
             for i in range(n):
                 if agg[y, x, i] == -1 or field > agg[y, x, i]:
-                    # Bump previous values along to make room for new value.
-                    for j in range(n-1, i, -1):
-                        agg[y, x, j] = agg[y, x, j-1]
-                    agg[y, x, i] = field
+                    shift_and_insert(agg[y, x], field, i)
                     return i
         return -1
 
@@ -2295,10 +2272,7 @@ def _append(x, y, agg, field):
             n = agg.shape[2]
             for i in range(n):
                 if agg[y, x, i] == -1 or field < agg[y, x, i]:
-                    # Bump previous values along to make room for new value.
-                    for j in range(n-1, i, -1):
-                        agg[y, x, j] = agg[y, x, j-1]
-                    agg[y, x, i] = field
+                    shift_and_insert(agg[y, x], field, i)
                     return i
         return -1
 
diff --git a/datashader/utils.py b/datashader/utils.py
index 3f7eaab4c..982ed066d 100644
--- a/datashader/utils.py
+++ b/datashader/utils.py
@@ -648,6 +648,35 @@ def nanmin_in_place(ret, other):
             ret[i] = other[i]
 
 
+@ngjit
+def shift_and_insert(target, value, index):
+    """Insert a value into a 1D array at a particular index, but before doing
+    that shift the previous values along one to make room. For use in
+    ``FloatingNReduction`` classes such as ``max_n`` and ``first_n`` which
+    store ``n`` values per pixel.
+
+    Parameters
+    ----------
+    target : 1d numpy array
+        Target pixel array.
+
+    value : float
+        Value to insert into target pixel array.
+
+    index : int
+        Index to insert at.
+
+    Returns
+    -------
+    Index beyond insertion, i.e. where the first shifted value now sits.
+    """
+    n = len(target)
+    for i in range(n-1, index, -1):
+        target[i] = target[i-1]
+    target[index] = value
+    return index + 1
+
+
 @ngjit
 def _nanmax_n_impl(ret_pixel, other_pixel):
     """Single pixel implementation of nanmax_n_in_place.
@@ -666,11 +695,7 @@ def _nanmax_n_impl(ret_pixel, other_pixel):
         else:
             for i in range(istart, n):
                 if isnull(ret_pixel[i]) or other_value > ret_pixel[i]:
-                    # Shift values along then insert.
-                    for j in range(n-1, i, -1):
-                        ret_pixel[j] = ret_pixel[j-1]
-                    ret_pixel[i] = other_value
-                    istart = i+1
+                    istart = shift_and_insert(ret_pixel, other_value, i)
                     break
 
 
@@ -718,11 +743,7 @@ def _nanmin_n_impl(ret_pixel, other_pixel):
         else:
             for i in range(istart, n):
                 if isnull(ret_pixel[i]) or other_value < ret_pixel[i]:
-                    # Shift values along then insert.
-                    for j in range(n-1, i, -1):
-                        ret_pixel[j] = ret_pixel[j-1]
-                    ret_pixel[i] = other_value
-                    istart = i+1
+                    istart = shift_and_insert(ret_pixel, other_value, i)
                     break
 
 
@@ -820,11 +841,7 @@ def _row_max_n_impl(ret_pixel, other_pixel):
         else:
             for i in range(istart, n):
                 if ret_pixel[i] == -1 or other_value > ret_pixel[i]:
-                    # Shift values along then insert.
-                    for j in range(n-1, i, -1):
-                        ret_pixel[j] = ret_pixel[j-1]
-                    ret_pixel[i] = other_value
-                    istart = i+1
+                    istart = shift_and_insert(ret_pixel, other_value, i)
                     break
 
 
@@ -867,11 +884,7 @@ def _row_min_n_impl(ret_pixel, other_pixel):
         else:
             for i in range(istart, n):
                 if ret_pixel[i] == -1 or other_value < ret_pixel[i]:
-                    # Shift values along then insert.
-                    for j in range(n-1, i, -1):
-                        ret_pixel[j] = ret_pixel[j-1]
-                    ret_pixel[i] = other_value
-                    istart = i+1
+                    istart = shift_and_insert(ret_pixel, other_value, i)
                     break