Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support by(max_n) and by(min_n) #1229

Merged
merged 1 commit into from
Jun 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion datashader/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,8 @@ def make_append(bases, cols, calls, glyph, categorical, antialias):
# Categorical aggregate arrays need to be unpacked
if categorical:
col_index = '' if isinstance(cols[0], category_codes) else '[0]'
cat_var = 'cat = int({0}[{1}]{2})'.format(signature[-1], subscript, col_index)
signature_index = -2 if any_uses_cuda_mutex else -1
cat_var = 'cat = int({0}[{1}]{2})'.format(signature[signature_index], subscript, col_index)
aggs = ['{0} = {0}[:, :, cat]'.format(s) for s in signature[:len(calls)]]
body = [cat_var] + aggs + body

Expand Down
29 changes: 22 additions & 7 deletions datashader/reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,9 @@ def out_dshape(self, input_dshape, antialias, cuda, partitioned):
def inputs(self):
return (self.preprocess, )

def uses_cuda_mutex(self):
return self.reduction.uses_cuda_mutex()

def _antialias_requires_2_stages(self):
return self.reduction._antialias_requires_2_stages()

Expand Down Expand Up @@ -684,7 +687,7 @@ def _build_finalize(self, dshape):
def finalize(bases, cuda=False, **kwargs):
kwargs['dims'] += [self.cat_column]
kwargs['coords'][self.cat_column] = cats
return self.reduction._finalize(bases, cuda=cuda, **kwargs)
return self.reduction._build_finalize(dshape)(bases, cuda=cuda, **kwargs)

return finalize

Expand Down Expand Up @@ -1474,16 +1477,22 @@ def _build_combine(self, dshape, antialias, cuda, partitioned):
@staticmethod
def _combine(aggs):
ret = aggs[0]
if ret.ndim == 3: # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
# 4d view of each agg
aggs = [np.expand_dims(agg, 2) for agg in aggs]
for i in range(1, len(aggs)):
nanmax_n_in_place(ret, aggs[i])
nanmax_n_in_place(aggs[0], aggs[i])
return ret

@staticmethod
def _combine_cuda(aggs):
ret = aggs[0]
kernel_args = cuda_args(ret.shape[:2])
if ret.ndim == 3: # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
# 4d view of each agg
aggs = [cp.expand_dims(agg, 2) for agg in aggs]
kernel_args = cuda_args(aggs[0].shape[:3])
for i in range(1, len(aggs)):
cuda_nanmax_n_in_place[kernel_args](ret, aggs[i])
cuda_nanmax_n_in_place[kernel_args](aggs[0], aggs[i])
return ret


Expand Down Expand Up @@ -1540,16 +1549,22 @@ def _build_combine(self, dshape, antialias, cuda, partitioned):
@staticmethod
def _combine(aggs):
ret = aggs[0]
if ret.ndim == 3: # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
# 4d view of each agg
aggs = [np.expand_dims(agg, 2) for agg in aggs]
for i in range(1, len(aggs)):
nanmin_n_in_place(ret, aggs[i])
nanmin_n_in_place(aggs[0], aggs[i])
return ret

@staticmethod
def _combine_cuda(aggs):
ret = aggs[0]
kernel_args = cuda_args(ret.shape[:2])
if ret.ndim == 3: # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
# 4d view of each agg
aggs = [cp.expand_dims(agg, 2) for agg in aggs]
kernel_args = cuda_args(aggs[0].shape[:3])
for i in range(1, len(aggs)):
cuda_nanmin_n_in_place[kernel_args](ret, aggs[i])
cuda_nanmin_n_in_place[kernel_args](aggs[0], aggs[i])
return ret


Expand Down
62 changes: 62 additions & 0 deletions datashader/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@
'plusminus': np.arange(20, dtype='f8')*([1, -1]*10),
'empty_bin': np.array([0.] * 15 + [np.nan] * 5),
'cat': ['a']*5 + ['b']*5 + ['c']*5 + ['d']*5,
'cat2': ['a', 'b', 'c', 'd']*5,
'cat_int': np.array([10]*5 + [11]*5 + [12]*5 + [13]*5)})
df_pd.cat = df_pd.cat.astype('category')
df_pd.cat2 = df_pd.cat2.astype('category')
df_pd.at[2,'f32'] = nan
df_pd.at[2,'f64'] = nan
df_pd.at[2,'plusminus'] = nan
Expand Down Expand Up @@ -327,6 +329,66 @@ def test_last_n(ddf, npartitions):
assert_eq_ndarray(agg[:, :, 0].data, c.points(ddf, 'x', 'y', ds.last('plusminus')).data)


@pytest.mark.parametrize('ddf', ddfs)
@pytest.mark.parametrize('npartitions', [1, 2, 3, 4])
def test_categorical_min(ddf, npartitions):
ddf = ddf.repartition(npartitions)
assert ddf.npartitions == npartitions
sol_int = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], dtype=np.float64)
sol_float = np.array([[[0, 1, nan, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]])
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('i32'))).data, sol_int)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('i64'))).data, sol_int)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('f32'))).data, sol_float)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('f64'))).data, sol_float)


@pytest.mark.parametrize('ddf', ddfs)
@pytest.mark.parametrize('npartitions', [1, 2, 3, 4])
def test_categorical_max(ddf, npartitions):
ddf = ddf.repartition(npartitions)
assert ddf.npartitions == npartitions
sol_int = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], dtype=np.float64)
sol_float = np.array([[[4, 1, nan, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]])
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('i32'))).data, sol_int)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('i64'))).data, sol_int)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('f32'))).data, sol_float)
assert_eq_ndarray(c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('f64'))).data, sol_float)


@pytest.mark.parametrize('ddf', ddfs)
@pytest.mark.parametrize('npartitions', [1, 2, 3, 4])
def test_categorical_min_n(ddf, npartitions):
ddf = ddf.repartition(npartitions)
assert ddf.npartitions == npartitions
solution = np.array([[[[0, 4, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]],
[[12, nan, nan], [13, nan, nan], [10, 14, nan], [11, nan, nan]]],
[[[8, nan, nan], [5, 9, nan], [6, nan, nan], [7, nan, nan]],
[[16, nan, nan], [17, nan, nan], [18, nan, nan], [15, 19, nan]]]])
for n in range(1, 3):
agg = c.points(ddf, 'x', 'y', ds.by('cat2', ds.min_n('f32', n=n)))
out = solution[:, :, :, :n]
assert_eq_ndarray(agg.data, out)
if n == 1:
assert_eq_ndarray(agg[..., 0].data, c.points(ddf, 'x', 'y', ds.by('cat2', ds.min('f32'))).data)


@pytest.mark.parametrize('ddf', ddfs)
@pytest.mark.parametrize('npartitions', [1, 2, 3, 4])
def test_categorical_max_n(ddf, npartitions):
ddf = ddf.repartition(npartitions)
assert ddf.npartitions == npartitions
solution = np.array([[[[4, 0, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]],
[[12, nan, nan], [13, nan, nan], [14, 10, nan], [11, nan, nan]]],
[[[8, nan, nan], [9, 5, nan], [6, nan, nan], [7, nan, nan]],
[[16, nan, nan], [17, nan, nan], [18, nan, nan], [19, 15, nan]]]])
for n in range(1, 3):
agg = c.points(ddf, 'x', 'y', ds.by('cat2', ds.max_n('f32', n=n)))
out = solution[:, :, :, :n]
assert_eq_ndarray(agg.data, out)
if n == 1:
assert_eq_ndarray(agg[..., 0].data, c.points(ddf, 'x', 'y', ds.by('cat2', ds.max('f32'))).data)


@pytest.mark.parametrize('ddf', ddfs)
@pytest.mark.parametrize('npartitions', [1, 2, 3, 4])
def test_where_max(ddf, npartitions):
Expand Down
58 changes: 57 additions & 1 deletion datashader/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,19 @@
'plusminus': np.arange(20, dtype='f8')*([1, -1]*10),
'empty_bin': np.array([0.] * 15 + [np.nan] * 5),
'cat': ['a']*5 + ['b']*5 + ['c']*5 + ['d']*5,
'cat2': ['a', 'b', 'c', 'd']*5,
'cat_int': np.array([10]*5 + [11]*5 + [12]*5 + [13]*5)})
df_pd.cat = df_pd.cat.astype('category')
df_pd.cat2 = df_pd.cat2.astype('category')
df_pd.at[2,'f32'] = nan
df_pd.at[2,'f64'] = nan
df_pd.at[2,'plusminus'] = nan
# x 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find myself creating this manual table whenever I need to check new tests, so here including it to make it easier to check new tests in future.

# y 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 1 1 1 1 1
# i32 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# f32 0 1 nan 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# plusminus 0 -1 nan -3 4 -5 6 -7 8 -9 10 -11 12 -13 14 -15 16 -17 18 -19
# cat2 a b c d a b c d a b c d a b c d a b c d

test_gpu = bool(int(os.getenv("DATASHADER_TEST_GPU", 0)))

Expand Down Expand Up @@ -237,6 +245,54 @@ def test_max_n(df):
assert_eq_ndarray(agg[:, :, 0].data, c.points(df, 'x', 'y', ds.max('plusminus')).data)


@pytest.mark.parametrize('df', dfs)
def test_categorical_min(df):
sol_int = np.array([[[0, 1, 2, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]], dtype=np.float64)
sol_float = np.array([[[0, 1, nan, 3], [12, 13, 10, 11]], [[8, 5, 6, 7], [16, 17, 18, 15]]])
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.min('i32'))).data, sol_int)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.min('i64'))).data, sol_int)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.min('f32'))).data, sol_float)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.min('f64'))).data, sol_float)


@pytest.mark.parametrize('df', dfs)
def test_categorical_max(df):
sol_int = np.array([[[4, 1, 2, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]], dtype=np.float64)
sol_float = np.array([[[4, 1, nan, 3], [12, 13, 14, 11]], [[8, 9, 6, 7], [16, 17, 18, 19]]])
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.max('i32'))).data, sol_int)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.max('i64'))).data, sol_int)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.max('f32'))).data, sol_float)
assert_eq_ndarray(c.points(df, 'x', 'y', ds.by('cat2', ds.max('f64'))).data, sol_float)


@pytest.mark.parametrize('df', dfs)
def test_categorical_min_n(df):
solution = np.array([[[[0, 4, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]],
[[12, nan, nan], [13, nan, nan], [10, 14, nan], [11, nan, nan]]],
[[[8, nan, nan], [5, 9, nan], [6, nan, nan], [7, nan, nan]],
[[16, nan, nan], [17, nan, nan], [18, nan, nan], [15, 19, nan]]]])
for n in range(1, 3):
agg = c.points(df, 'x', 'y', ds.by('cat2', ds.min_n('f32', n=n)))
out = solution[:, :, :, :n]
assert_eq_ndarray(agg.data, out)
if n == 1:
assert_eq_ndarray(agg[..., 0].data, c.points(df, 'x', 'y', ds.by('cat2', ds.min('f32'))).data)


@pytest.mark.parametrize('df', dfs)
def test_categorical_max_n(df):
solution = np.array([[[[4, 0, nan], [1, nan, nan], [nan, nan, nan], [3, nan, nan]],
[[12, nan, nan], [13, nan, nan], [14, 10, nan], [11, nan, nan]]],
[[[8, nan, nan], [9, 5, nan], [6, nan, nan], [7, nan, nan]],
[[16, nan, nan], [17, nan, nan], [18, nan, nan], [19, 15, nan]]]])
for n in range(1, 3):
agg = c.points(df, 'x', 'y', ds.by('cat2', ds.max_n('f32', n=n)))
out = solution[:, :, :, :n]
assert_eq_ndarray(agg.data, out)
if n == 1:
assert_eq_ndarray(agg[..., 0].data, c.points(df, 'x', 'y', ds.by('cat2', ds.max('f32'))).data)


@pytest.mark.parametrize('df', dfs)
def test_where_min_row_index(df):
out = xr.DataArray([[0, 10], [-5, -15]], coords=coords, dims=dims)
Expand Down Expand Up @@ -652,7 +708,7 @@ def test_categorical_sum_binning(df):


@pytest.mark.parametrize('df', dfs)
def test_categorical_max(df):
def test_categorical_max2(df):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keeping the existing categorical max test but renaming it so it doesn't overwrite the new one which is above it in this file.

sol = np.array([[[ 4, nan, nan, nan],
[nan, nan, 14, nan]],
[[nan, 9, nan, nan],
Expand Down
20 changes: 10 additions & 10 deletions datashader/transfer_functions/_cuda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ def cuda_mutex_unlock(mutex, index):
def cuda_nanmax_n_in_place(ret, other):
"""CUDA equivalent of nanmax_n_in_place.
"""
ny, nx, n = ret.shape
x, y = cuda.grid(2)
if x < nx and y < ny:
ret_pixel = ret[y, x] # 1D array of n values for single pixel
other_pixel = other[y, x] # ditto
ny, nx, ncat, n = ret.shape
x, y, cat = cuda.grid(3)
if x < nx and y < ny and cat < ncat:
ret_pixel = ret[y, x, cat] # 1D array of n values for single pixel
other_pixel = other[y, x, cat] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
Expand All @@ -230,11 +230,11 @@ def cuda_nanmax_n_in_place(ret, other):
def cuda_nanmin_n_in_place(ret, other):
"""CUDA equivalent of nanmin_n_in_place.
"""
ny, nx, n = ret.shape
x, y = cuda.grid(2)
if x < nx and y < ny:
ret_pixel = ret[y, x] # 1D array of n values for single pixel
other_pixel = other[y, x] # ditto
ny, nx, ncat, n = ret.shape
x, y, cat = cuda.grid(3)
if x < nx and y < ny and cat < ncat:
ret_pixel = ret[y, x, cat] # 1D array of n values for single pixel
other_pixel = other[y, x, cat] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
Expand Down
98 changes: 52 additions & 46 deletions datashader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -650,64 +650,70 @@ def nanmin_in_place(ret, other):

@ngjit_parallel
def nanmax_n_in_place(ret, other):
"""Combine two max-n arrays, taking nans into account. Max-n arrays are 3D
with the last axis containing n values in descending order. If there are
fewer than n values it is padded with nans.
"""Combine two max-n arrays, taking nans into account. Max-n arrays are 4D
with shape (ny, nx, ncat, n) where ny and nx are the number of pixels,
ncat the number of categories (will be 1 if not using a categorical
reduction) and the last axis containing n values in descending order.
If there are fewer than n values it is padded with nans.
Return the first array.
"""
ny, nx, n = ret.shape
ny, nx, ncat, n = ret.shape
for y in nb.prange(ny):
for x in range(nx):
ret_pixel = ret[y, x] # 1D array of n values for single pixel
other_pixel = other[y, x] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
# the two pixel arrays just once each.
istart = 0
for other_value in other_pixel:
if isnull(other_value):
break
else:
for i in range(istart, n):
if isnull(ret_pixel[i]) or other_value > ret_pixel[i]:
# Bump values along then insert.
for j in range(n-1, i, -1):
ret_pixel[j] = ret_pixel[j-1]
ret_pixel[i] = other_value
istart = i+1
break
for cat in range(ncat):
ret_pixel = ret[y, x, cat] # 1D array of n values for single pixel
other_pixel = other[y, x, cat] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
# the two pixel arrays just once each.
istart = 0
for other_value in other_pixel:
if isnull(other_value):
break
else:
for i in range(istart, n):
if isnull(ret_pixel[i]) or other_value > ret_pixel[i]:
# Bump values along then insert.
for j in range(n-1, i, -1):
ret_pixel[j] = ret_pixel[j-1]
ret_pixel[i] = other_value
istart = i+1
break


@ngjit_parallel
def nanmin_n_in_place(ret, other):
"""Combine two min-n arrays, taking nans into account. Min-n arrays are 3D
with the last axis containing n values in descending order. If there are
fewer than n values it is padded with nans.
"""Combine two min-n arrays, taking nans into account. Min-n arrays are 4D
with shape (ny, nx, ncat, n) where ny and nx are the number of pixels,
ncat the number of categories (will be 1 if not using a categorical
reduction) and the last axis containing n values in ascending order.
If there are fewer than n values it is padded with nans.
Return the first array.
"""
ny, nx, n = ret.shape
ny, nx, ncat, n = ret.shape
for y in nb.prange(ny):
for x in range(nx):
ret_pixel = ret[y, x] # 1D array of n values for single pixel
other_pixel = other[y, x] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
# the two pixel arrays just once each.
istart = 0
for other_value in other_pixel:
if isnull(other_value):
break
else:
for i in range(istart, n):
if isnull(ret_pixel[i]) or other_value < ret_pixel[i]:
# Bump values along then insert.
for j in range(n-1, i, -1):
ret_pixel[j] = ret_pixel[j-1]
ret_pixel[i] = other_value
istart = i+1
break
for cat in range(ncat):
ret_pixel = ret[y, x, cat] # 1D array of n values for single pixel
other_pixel = other[y, x, cat] # ditto
# Walk along other_pixel array a value at a time, find insertion
# index in ret_pixel and bump values along to insert. Next
# other_pixel value is inserted at a higher index, so this walks
# the two pixel arrays just once each.
istart = 0
for other_value in other_pixel:
if isnull(other_value):
break
else:
for i in range(istart, n):
if isnull(ret_pixel[i]) or other_value < ret_pixel[i]:
# Bump values along then insert.
for j in range(n-1, i, -1):
ret_pixel[j] = ret_pixel[j-1]
ret_pixel[i] = other_value
istart = i+1
break


@ngjit_parallel
Expand Down