Skip to content

Commit

Permalink
ENH: Add bincount, like np.bincount but accepts an axis parameter.
Browse files Browse the repository at this point in the history
  • Loading branch information
WarrenWeckesser committed Sep 21, 2024
1 parent 590561b commit 57052ed
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 0 deletions.
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ function to provide a capability that is not possible with a gufunc.

| Function | Description |
| -------- | ----------- |
| [`bincount`](#bincount) | Like `np.bincount`, but gufunc-based |
| [`convert_to_base`](#convert_to_base) | Convert an integer to a given base. |
| [`nextn_greater`](#nextn_greater) | Next n values greater than the given x. |
| [`nextn_less`](#nextn_less) | Next n values greater than the given x. |
Expand Down Expand Up @@ -1870,6 +1871,66 @@ Compare to
```

#### `bincount`

`bincount(x, m=None, out=None, axis=-1)` is a Python function that wraps a
gufunc with shape signature `(n)->(m)`. The function is like `np.bincount`,
but it accepts n-dimensional arrays. When the input has more than one
dimension, the operation is applied along the given axis.

```
>>> import numpy as np
>>> from ufunclab import bincount
Create an array to work with. `x` is an array with shape `(3, 12)`.
>>> rng = np.random.default_rng(121263137472525314065)
>>> x = rng.integers(0, 8, size=(3, 12))
>>> x
array([[7, 0, 5, 0, 2, 7, 7, 3, 0, 3, 4, 5],
[2, 6, 7, 1, 3, 0, 6, 1, 2, 0, 0, 6],
[0, 6, 1, 5, 2, 1, 4, 2, 6, 4, 2, 6]])
By default, `bincount` operates along the last axis. The default
value of `m` is one more than maximum value in `x`, so in this case
the output length of the counts will be 8. That is, the output
array will have shape `(3, 8)`.
>>> bincount(x)
array([[3, 0, 1, 2, 1, 2, 0, 3],
[3, 2, 2, 1, 0, 0, 3, 1],
[1, 2, 3, 0, 2, 1, 3, 0]], dtype=uint64)
If we given a value for `m` that is larger than 8, the final values
will be 0.
>>> bincount(x, 10)
array([[3, 0, 1, 2, 1, 2, 0, 3, 0, 0],
[3, 2, 2, 1, 0, 0, 3, 1, 0, 0],
[1, 2, 3, 0, 2, 1, 3, 0, 0, 0]], dtype=uint64)
If the given value of `m` is smaller than `np.max(x) + 1`, the values
greater than or equal to `m` are ignored.
>>> bincount(x, 4)
array([[3, 0, 1, 2],
[3, 2, 2, 1],
[1, 2, 3, 0]], dtype=uint64)
The `axis` parameter selects the axis of `x` along which `bincount`
is applied. In the following example, since `x` has shape `(3, 12)`,
the output has shape `(8, 12)` when `axis=0` is given.
>>> bincount(x, axis=0)
array([[1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2],
[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]], dtype=uint64)
```
#### `convert_to_base`

`convert_to_base(k, base, ndigits, out=None, axis=-1)` is a Python function
Expand Down
40 changes: 40 additions & 0 deletions src/bincount/bincount_gufunc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

#ifndef BINCOUNT_GUFUNC_H
#define BINCOUNT_GUFUNC_H

#define PY_SSIZE_T_CLEAN
#include "Python.h"

#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#include "numpy/ndarraytypes.h"

#include "../src/util/strided.hpp"


//
// `bincount_core_calc` is the C++ core function
// for the gufunc `bincount` with signature '(n)->(m)'
//
template<typename T, typename U>
static void
bincount_core_calc(
npy_intp n, // core dimension n
npy_intp m, // core dimension m
T *p_x, // pointer to x
npy_intp x_stride,
U *p_out, // pointer to out, a strided 1-d array
npy_intp out_stride
)
{
for (npy_intp i = 0; i < m; ++i) {
set(p_out, out_stride, i, static_cast<U>(0));
}
for (npy_intp i = 0; i < n; ++i) {
T k = get(p_x, x_stride, i);
if (k >= 0 && k < m) {
set(p_out, out_stride, k, get(p_out, out_stride, k) + 1);
}
}
}

#endif
42 changes: 42 additions & 0 deletions src/bincount/define_cxx_gufunc_extmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

from ufunc_config_types import UFuncExtMod, UFunc, UFuncSource


BINCOUNT_DOCSTRING = """\
bincount(x, /, ...)
Count integer in `x`, a 1-d array of length n.
The `out` array *must* be given, and it must be a 1-d.
"""

bincount_src = UFuncSource(
funcname='bincount_core_calc',
typesignatures=[
'b->P',
'B->P',
'h->P',
'H->P',
'i->P',
'I->P',
'l->P',
'L->P',
'q->P',
'Q->P',
'p->P',
'P->P',
]
)

bincount = UFunc(
name='bincount',
header='bincount_gufunc.h',
docstring=BINCOUNT_DOCSTRING,
signature='(n)->(m)',
sources=[bincount_src],
)

extmod = UFuncExtMod(
module='_bincount',
docstring=("This extension module defines the gufunc 'bincount'."),
ufuncs=[bincount],
)
1 change: 1 addition & 0 deletions ufunclab/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

# The keys of this dict are in modules that are lazy-loaded.
_name_to_module = {
'bincount': '._wrapped',
'convert_to_base': '._wrapped',
'nextn_greater': '._wrapped',
'nextn_less' : '._wrapped',
Expand Down
103 changes: 103 additions & 0 deletions ufunclab/_wrapped.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,115 @@
from numpy.exceptions import AxisError
except ImportError:
from numpy import AxisError
from ufunclab._bincount import bincount as _bincount
from ufunclab._convert_to_base import convert_to_base as _convert_to_base
from ufunclab._nextn import (nextn_less as _nextn_less,
nextn_greater as _nextn_greater)
from ufunclab._one_hot import one_hot as _one_hot



def bincount(x, m=None, out=None, axis=-1):
"""
Count the number of occurrences of the positive integers in the 1-d
array `x` that are less than `m`. If `m` is not given, the default
value is `max(np.max(x) + 1, 0)`. If `x` is an n-dimensional array,
`axis` selects which axis of `x` the operation is applied to.
Notes
-----
This function is a Python wrapper of a gufunc with shape signature
`(n)->(m)`.
Examples
--------
>>> import numpy as np
>>> from ufunclab import bincount
Create an array to work with. `x` is an array with shape `(3, 12)`.
>>> rng = np.random.default_rng(121263137472525314065)
>>> x = rng.integers(0, 8, size=(3, 12))
>>> x
array([[7, 0, 5, 0, 2, 7, 7, 3, 0, 3, 4, 5],
[2, 6, 7, 1, 3, 0, 6, 1, 2, 0, 0, 6],
[0, 6, 1, 5, 2, 1, 4, 2, 6, 4, 2, 6]])
By default, `bincount` operates along the last axis. The default
value of `m` is one more than maximum value in `x`, so in this case
the output length of the counts will be 8. That is, the output
array will have shape `(3, 8)`.
>>> bincount(x)
array([[3, 0, 1, 2, 1, 2, 0, 3],
[3, 2, 2, 1, 0, 0, 3, 1],
[1, 2, 3, 0, 2, 1, 3, 0]], dtype=uint64)
If we given a value for `m` that is larger than 8, the final values
will be 0.
>>> bincount(x, 10)
array([[3, 0, 1, 2, 1, 2, 0, 3, 0, 0],
[3, 2, 2, 1, 0, 0, 3, 1, 0, 0],
[1, 2, 3, 0, 2, 1, 3, 0, 0, 0]], dtype=uint64)
If the given value of `m` is smaller than `np.max(x) + 1`, the values
greater than or equal to `m` are ignored.
>>> bincount(x, 4)
array([[3, 0, 1, 2],
[3, 2, 2, 1],
[1, 2, 3, 0]], dtype=uint64)
The `axis` parameter selects the axis of `x` along which `bincount`
is applied. In the following example, since `x` has shape `(3, 12)`,
the output has shape `(8, 12)` when `axis=0` is given.
>>> bincount(x, axis=0)
array([[1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0],
[0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0],
[1, 0, 0, 0, 2, 0, 0, 1, 1, 0, 1, 0],
[0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0],
[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2],
[1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]], dtype=uint64)
"""
x = np.asarray(x)
if x.dtype.char not in np.typecodes['AllInteger']:
raise ValueError('x must be an integer array.')
if m is None:
m = max(np.max(x) + 1, 0)
else:
try:
m = operator.index(m)
except TypeError:
raise ValueError(f'm must be a nonnegative integer; got {m!r}')
if m < 0:
raise ValueError(f'm must be a nonnegative integer; got {m!r}')

x_shape = x.shape
adjusted_axis = axis
if adjusted_axis < 0:
adjusted_axis += len(x_shape)
if adjusted_axis < 0 or adjusted_axis > len(x_shape):
raise AxisError(f'invalid axis {axis}')
out_shape = list(x_shape)
out_shape[adjusted_axis] = m
out_shape = tuple(out_shape)
if out is not None:
if out.shape != out_shape:
raise ValueError(f'out.shape must be {out_shape}; '
f'got {out.shape}.')
else:
out = np.empty(out_shape, dtype=np.uintp)
return _bincount(x, out=out, axes=[axis, axis])


bincount.gufunc = _bincount


# XXX Except for `out` and `axis`, this function does not expose any of the
# gufunc keyword parameters.
#
Expand Down
1 change: 1 addition & 0 deletions ufunclab/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ npymath_lib = cc.find_library('npymath', dirs: npymath_path)

gufunc_cxx_src_dirs = [
'all_same',
'bincount',
'backlash',
'convert_to_base',
'corr',
Expand Down
57 changes: 57 additions & 0 deletions ufunclab/tests/test_bincount.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import pytest
import numpy as np
from numpy.testing import assert_equal
from ufunclab import bincount


def test_1d_default_m():
x = np.array([0, 0, 3, 4, 3, 4, 0, 3, 0])
y = bincount(x)
assert_equal(y, [4, 0, 0, 3, 2])


def test_1d_given_m():
x = np.array([0, 0, 3, 4, 3, 4, 0, 3, 0])
y = bincount(x, 8)
assert_equal(y, [4, 0, 0, 3, 2, 0, 0, 0])


@pytest.mark.parametrize('m', [None, 2, 5, 8])
def test_nd_default_m(m):
x = np.array([[0, 4, 4, 3, 2, 1],
[1, 1, 2, 2, 3, 3],
[4, 4, 4, 4, 4, 2]])
ref = np.array([[1, 1, 1, 1, 2],
[0, 2, 2, 2, 0],
[0, 0, 1, 0, 5]])
refm = max(np.max(x) + 1, 0)
y = bincount(x, m=m)
if (m is None):
expected = ref
elif m <= refm:
expected = ref[:, :m]
else:
expected = np.pad(ref, [(0, 0), (0, m - refm)])
assert_equal(y, expected)


@pytest.mark.parametrize('m', [None, 2, 5, 8])
def test_axis(m):
x = np.array([[0, 4, 4],
[1, 1, 2],
[4, 4, 4],
[0, 0, 2]])
ref = np.array([[2, 1, 0],
[1, 1, 0],
[0, 0, 2],
[0, 0, 0],
[1, 2, 2]])
refm = max(np.max(x) + 1, 0)
y = bincount(x, m=m, axis=0)
if (m is None):
expected = ref
elif m <= refm:
expected = ref[:m, :]
else:
expected = np.pad(ref, [(0, m - refm), (0, 0)])
assert_equal(y, expected)

0 comments on commit 57052ed

Please sign in to comment.