Skip to content

Commit

Permalink
Fix an issue with empty preallocation (#113)
Browse files Browse the repository at this point in the history
* Fix issue with NumPy chain and preallocation=0

Original logic in `grow_append` was to extend data by 10% of its length.
This is a problem with the original data length is 0, since it then
never extends.
This commit amends `grow_append` to always extend by at least 10 elements.

* [NumPyBackend] Prevent ``preallocate=0`` from creating object arrays.

`grow_append` cannot know if ``preallocate = 0`` was used: it only
looks at the `rigid` value to determine how to append.
Because of this, will always fail when we use `preallocate = 0` with
tensor variables, since then the shapes of `target` and `extension`
don’t match.
A simple fix is to simply deactivate the special behavior for
`preallocate = 0`.

- This commit extends `test_growing` with a `preallocate` parameter,
  so that we test both cases where it is 0 and positive.
- We also fix `test_growing` to match the new behavior of `grow_append`
  introduced in #ea812b0, where data arrays always grow by at least 10.

* Remove trailing whitespace

---------

Co-authored-by: Alexandre René <[email protected]>
Co-authored-by: Michael Osthege <[email protected]>
  • Loading branch information
3 people committed Jun 16, 2024
1 parent b35be42 commit b9704f3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 14 deletions.
8 changes: 4 additions & 4 deletions mcbackend/backends/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def grow_append(
length = len(target)
if length == draw_idx:
# Grow the array by 10 %
ngrow = math.ceil(0.1 * length)
ngrow = max(10, math.ceil(0.1 * length))
if rigid[vn]:
extension = numpy.empty((ngrow,) + numpy.shape(v))
else:
Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non
and grow the allocated memory by 10 % when needed.
Exceptions are variables with non-rigid shapes (indicated by 0 in the shape tuple)
where the correct amount of memory cannot be pre-allocated.
In these cases, and when ``preallocate == 0`` object arrays are used.
In these cases object arrays are used.
"""
self._var_is_rigid: Dict[str, bool] = {}
self._samples: Dict[str, numpy.ndarray] = {}
Expand All @@ -68,11 +68,11 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non
for var in variables:
rigid = is_rigid(var.shape) and not var.undefined_ndim and var.dtype != "str"
rigid_dict[var.name] = rigid
if preallocate > 0 and rigid:
if rigid:
reserve = (preallocate, *var.shape)
target_dict[var.name] = numpy.empty(reserve, var.dtype)
else:
target_dict[var.name] = numpy.array([None] * preallocate)
target_dict[var.name] = numpy.array([None] * preallocate, dtype=object)

super().__init__(cmeta, rmeta)

Expand Down
30 changes: 20 additions & 10 deletions mcbackend/test_backend_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import hagelkorn
import numpy
import pytest

from mcbackend.backends.numpy import NumPyBackend, NumPyChain, NumPyRun
from mcbackend.core import RunMeta
Expand Down Expand Up @@ -43,8 +44,9 @@ def test_targets(self):
assert chain._samples["changeling"].dtype == object
pass

def test_growing(self):
imb = NumPyBackend(preallocate=15)
@pytest.mark.parametrize("preallocate", [0, 75])
def test_growing(self, preallocate):
imb = NumPyBackend(preallocate=preallocate)
rm = RunMeta(
rid=hagelkorn.random(),
variables=[
Expand All @@ -62,19 +64,27 @@ def test_growing(self):
)
run = imb.init_run(rm)
chain = run.init_chain(0)
assert chain._samples["A"].shape == (15, 2)
assert chain._samples["B"].shape == (15,)
for _ in range(22):
assert chain._samples["A"].shape == (preallocate, 2)
assert chain._samples["B"].shape == (preallocate,)
for _ in range(130):
draw = {
"A": numpy.random.uniform(size=(2,)),
"B": numpy.random.uniform(size=(random.randint(0, 10),)),
}
chain.append(draw)
# Growth: 15 → 17 → 19 → 21 → 24
assert chain._samples["A"].shape == (24, 2)
assert chain._samples["B"].shape == (24,)
assert chain.get_draws("A").shape == (22, 2)
assert chain.get_draws("B").shape == (22,)
# NB: Growth algorithm adds max(10, ceil(0.1*length))
if preallocate == 75:
# 75 → 85 → 95 → 105 → 116 → 128 → 141
assert chain._samples["A"].shape == (141, 2)
assert chain._samples["B"].shape == (141,)
elif preallocate == 0:
# 10 → 20 → ... → 90 → 100 → 110 → 121 → 134
assert chain._samples["A"].shape == (134, 2)
assert chain._samples["B"].shape == (134,)
else:
assert False, f"Missing test for {preallocate=}"
assert chain.get_draws("A").shape == (130, 2)
assert chain.get_draws("B").shape == (130,)
pass


Expand Down

0 comments on commit b9704f3

Please sign in to comment.