diff --git a/README.md b/README.md index 2c1fddf..3a8230d 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,7 @@ function to provide a capability that is not possible with a gufunc. | Function | Description | | -------- | ----------- | +| [`bincount`](#bincount) | Like `np.bincount`, but gufunc-based | | [`convert_to_base`](#convert_to_base) | Convert an integer to a given base. | | [`nextn_greater`](#nextn_greater) | Next n values greater than the given x. | | [`nextn_less`](#nextn_less) | Next n values greater than the given x. | @@ -1870,6 +1871,66 @@ Compare to ``` +#### `bincount` + +`bincount(x, m=None, out=None, axis=-1)` is a Python function that wraps a +gufunc with shape signature `(n)->(m)`. The function is like `np.bincount`, +but it accepts n-dimensional arrays. When the input has more than one +dimension, the operation is applied along the given axis. + +``` +>>> import numpy as np +>>> from ufunclab import bincount + +Create an array to work with. `x` is an array with shape `(3, 12)`. + +>>> rng = np.random.default_rng(121263137472525314065) +>>> x = rng.integers(0, 8, size=(3, 12)) +>>> x +array([[7, 0, 5, 0, 2, 7, 7, 3, 0, 3, 4, 5], + [2, 6, 7, 1, 3, 0, 6, 1, 2, 0, 0, 6], + [0, 6, 1, 5, 2, 1, 4, 2, 6, 4, 2, 6]]) + +By default, `bincount` operates along the last axis. The default +value of `m` is one more than maximum value in `x`, so in this case +the output length of the counts will be 8. That is, the output +array will have shape `(3, 8)`. + +>>> bincount(x) +array([[3, 0, 1, 2, 1, 2, 0, 3], + [3, 2, 2, 1, 0, 0, 3, 1], + [1, 2, 3, 0, 2, 1, 3, 0]], dtype=uint64) + +If we given a value for `m` that is larger than 8, the final values +will be 0. + +>>> bincount(x, 10) +array([[3, 0, 1, 2, 1, 2, 0, 3, 0, 0], + [3, 2, 2, 1, 0, 0, 3, 1, 0, 0], + [1, 2, 3, 0, 2, 1, 3, 0, 0, 0]], dtype=uint64) + +If the given value of `m` is smaller than `np.max(x) + 1`, the values +greater than or equal to `m` are ignored. + +>>> bincount(x, 4) +array([[3, 0, 1, 2], + [3, 2, 2, 1], + [1, 2, 3, 0]], dtype=uint64) + +The `axis` parameter selects the axis of `x` along which `bincount` +is applied. In the following example, since `x` has shape `(3, 12)`, +the output has shape `(8, 12)` when `axis=0` is given. + +>>> bincount(x, axis=0) +array([[1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0], + [1, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0], + [0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0], + [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2], + [1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]], dtype=uint64) +``` #### `convert_to_base` `convert_to_base(k, base, ndigits, out=None, axis=-1)` is a Python function diff --git a/src/bincount/bincount_gufunc.h b/src/bincount/bincount_gufunc.h new file mode 100644 index 0000000..5c9c309 --- /dev/null +++ b/src/bincount/bincount_gufunc.h @@ -0,0 +1,40 @@ + +#ifndef BINCOUNT_GUFUNC_H +#define BINCOUNT_GUFUNC_H + +#define PY_SSIZE_T_CLEAN +#include "Python.h" + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "numpy/ndarraytypes.h" + +#include "../src/util/strided.hpp" + + +// +// `bincount_core_calc` is the C++ core function +// for the gufunc `bincount` with signature '(n)->(m)' +// +template +static void +bincount_core_calc( + npy_intp n, // core dimension n + npy_intp m, // core dimension m + T *p_x, // pointer to x + npy_intp x_stride, + U *p_out, // pointer to out, a strided 1-d array + npy_intp out_stride +) +{ + for (npy_intp i = 0; i < m; ++i) { + set(p_out, out_stride, i, static_cast(0)); + } + for (npy_intp i = 0; i < n; ++i) { + T k = get(p_x, x_stride, i); + if (k >= 0 && k < m) { + set(p_out, out_stride, k, get(p_out, out_stride, k) + 1); + } + } +} + +#endif diff --git a/src/bincount/define_cxx_gufunc_extmod.py b/src/bincount/define_cxx_gufunc_extmod.py new file mode 100644 index 0000000..865bdb7 --- /dev/null +++ b/src/bincount/define_cxx_gufunc_extmod.py @@ -0,0 +1,42 @@ + +from ufunc_config_types import UFuncExtMod, UFunc, UFuncSource + + +BINCOUNT_DOCSTRING = """\ +bincount(x, /, ...) + +Count integer in `x`, a 1-d array of length n. +The `out` array *must* be given, and it must be a 1-d. +""" + +bincount_src = UFuncSource( + funcname='bincount_core_calc', + typesignatures=[ + 'b->P', + 'B->P', + 'h->P', + 'H->P', + 'i->P', + 'I->P', + 'l->P', + 'L->P', + 'q->P', + 'Q->P', + 'p->P', + 'P->P', + ] +) + +bincount = UFunc( + name='bincount', + header='bincount_gufunc.h', + docstring=BINCOUNT_DOCSTRING, + signature='(n)->(m)', + sources=[bincount_src], +) + +extmod = UFuncExtMod( + module='_bincount', + docstring=("This extension module defines the gufunc 'bincount'."), + ufuncs=[bincount], +) diff --git a/ufunclab/__init__.py b/ufunclab/__init__.py index 251c6f8..ddb76c8 100644 --- a/ufunclab/__init__.py +++ b/ufunclab/__init__.py @@ -17,6 +17,7 @@ # The keys of this dict are in modules that are lazy-loaded. _name_to_module = { + 'bincount': '._wrapped', 'convert_to_base': '._wrapped', 'nextn_greater': '._wrapped', 'nextn_less' : '._wrapped', diff --git a/ufunclab/_wrapped.py b/ufunclab/_wrapped.py index ccaeae0..75f88d1 100644 --- a/ufunclab/_wrapped.py +++ b/ufunclab/_wrapped.py @@ -8,12 +8,115 @@ from numpy.exceptions import AxisError except ImportError: from numpy import AxisError +from ufunclab._bincount import bincount as _bincount from ufunclab._convert_to_base import convert_to_base as _convert_to_base from ufunclab._nextn import (nextn_less as _nextn_less, nextn_greater as _nextn_greater) from ufunclab._one_hot import one_hot as _one_hot + +def bincount(x, m=None, out=None, axis=-1): + """ + Count the number of occurrences of the positive integers in the 1-d + array `x` that are less than `m`. If `m` is not given, the default + value is `max(np.max(x) + 1, 0)`. If `x` is an n-dimensional array, + `axis` selects which axis of `x` the operation is applied to. + + Notes + ----- + This function is a Python wrapper of a gufunc with shape signature + `(n)->(m)`. + + Examples + -------- + >>> import numpy as np + >>> from ufunclab import bincount + + Create an array to work with. `x` is an array with shape `(3, 12)`. + + >>> rng = np.random.default_rng(121263137472525314065) + >>> x = rng.integers(0, 8, size=(3, 12)) + >>> x + array([[7, 0, 5, 0, 2, 7, 7, 3, 0, 3, 4, 5], + [2, 6, 7, 1, 3, 0, 6, 1, 2, 0, 0, 6], + [0, 6, 1, 5, 2, 1, 4, 2, 6, 4, 2, 6]]) + + By default, `bincount` operates along the last axis. The default + value of `m` is one more than maximum value in `x`, so in this case + the output length of the counts will be 8. That is, the output + array will have shape `(3, 8)`. + + >>> bincount(x) + array([[3, 0, 1, 2, 1, 2, 0, 3], + [3, 2, 2, 1, 0, 0, 3, 1], + [1, 2, 3, 0, 2, 1, 3, 0]], dtype=uint64) + + If we given a value for `m` that is larger than 8, the final values + will be 0. + + >>> bincount(x, 10) + array([[3, 0, 1, 2, 1, 2, 0, 3, 0, 0], + [3, 2, 2, 1, 0, 0, 3, 1, 0, 0], + [1, 2, 3, 0, 2, 1, 3, 0, 0, 0]], dtype=uint64) + + If the given value of `m` is smaller than `np.max(x) + 1`, the values + greater than or equal to `m` are ignored. + + >>> bincount(x, 4) + array([[3, 0, 1, 2], + [3, 2, 2, 1], + [1, 2, 3, 0]], dtype=uint64) + + The `axis` parameter selects the axis of `x` along which `bincount` + is applied. In the following example, since `x` has shape `(3, 12)`, + the output has shape `(8, 12)` when `axis=0` is given. + + >>> bincount(x, axis=0) + array([[1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0], + [1, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0], + [0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0], + [0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1], + [0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2], + [1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]], dtype=uint64) + + """ + x = np.asarray(x) + if x.dtype.char not in np.typecodes['AllInteger']: + raise ValueError('x must be an integer array.') + if m is None: + m = max(np.max(x) + 1, 0) + else: + try: + m = operator.index(m) + except TypeError: + raise ValueError(f'm must be a nonnegative integer; got {m!r}') + if m < 0: + raise ValueError(f'm must be a nonnegative integer; got {m!r}') + + x_shape = x.shape + adjusted_axis = axis + if adjusted_axis < 0: + adjusted_axis += len(x_shape) + if adjusted_axis < 0 or adjusted_axis > len(x_shape): + raise AxisError(f'invalid axis {axis}') + out_shape = list(x_shape) + out_shape[adjusted_axis] = m + out_shape = tuple(out_shape) + if out is not None: + if out.shape != out_shape: + raise ValueError(f'out.shape must be {out_shape}; ' + f'got {out.shape}.') + else: + out = np.empty(out_shape, dtype=np.uintp) + return _bincount(x, out=out, axes=[axis, axis]) + + +bincount.gufunc = _bincount + + # XXX Except for `out` and `axis`, this function does not expose any of the # gufunc keyword parameters. # diff --git a/ufunclab/meson.build b/ufunclab/meson.build index 881a221..9e6e0ae 100644 --- a/ufunclab/meson.build +++ b/ufunclab/meson.build @@ -47,6 +47,7 @@ npymath_lib = cc.find_library('npymath', dirs: npymath_path) gufunc_cxx_src_dirs = [ 'all_same', + 'bincount', 'backlash', 'convert_to_base', 'corr', diff --git a/ufunclab/tests/test_bincount.py b/ufunclab/tests/test_bincount.py new file mode 100644 index 0000000..5078536 --- /dev/null +++ b/ufunclab/tests/test_bincount.py @@ -0,0 +1,57 @@ +import pytest +import numpy as np +from numpy.testing import assert_equal +from ufunclab import bincount + + +def test_1d_default_m(): + x = np.array([0, 0, 3, 4, 3, 4, 0, 3, 0]) + y = bincount(x) + assert_equal(y, [4, 0, 0, 3, 2]) + + +def test_1d_given_m(): + x = np.array([0, 0, 3, 4, 3, 4, 0, 3, 0]) + y = bincount(x, 8) + assert_equal(y, [4, 0, 0, 3, 2, 0, 0, 0]) + + +@pytest.mark.parametrize('m', [None, 2, 5, 8]) +def test_nd_default_m(m): + x = np.array([[0, 4, 4, 3, 2, 1], + [1, 1, 2, 2, 3, 3], + [4, 4, 4, 4, 4, 2]]) + ref = np.array([[1, 1, 1, 1, 2], + [0, 2, 2, 2, 0], + [0, 0, 1, 0, 5]]) + refm = max(np.max(x) + 1, 0) + y = bincount(x, m=m) + if (m is None): + expected = ref + elif m <= refm: + expected = ref[:, :m] + else: + expected = np.pad(ref, [(0, 0), (0, m - refm)]) + assert_equal(y, expected) + + +@pytest.mark.parametrize('m', [None, 2, 5, 8]) +def test_axis(m): + x = np.array([[0, 4, 4], + [1, 1, 2], + [4, 4, 4], + [0, 0, 2]]) + ref = np.array([[2, 1, 0], + [1, 1, 0], + [0, 0, 2], + [0, 0, 0], + [1, 2, 2]]) + refm = max(np.max(x) + 1, 0) + y = bincount(x, m=m, axis=0) + if (m is None): + expected = ref + elif m <= refm: + expected = ref[:m, :] + else: + expected = np.pad(ref, [(0, m - refm), (0, 0)]) + assert_equal(y, expected)