diff --git a/tinybrain/accelerated.hpp b/tinybrain/accelerated.hpp index 800c579..148d2cd 100644 --- a/tinybrain/accelerated.hpp +++ b/tinybrain/accelerated.hpp @@ -190,7 +190,7 @@ template U* accumulate_2x2( T* channel, const size_t sx, const size_t sy, - const size_t sz = 1, const size_t sw = 1 + const size_t sz = 1, const size_t sw = 1, const size_t sv = 1 ) { const size_t sxy = sx * sy; @@ -198,7 +198,7 @@ U* accumulate_2x2( const size_t osx = (sx + 1) >> 1; const size_t osy = (sy + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * sz * sw; + const size_t ovoxels = osxy * sz * sw * sv; const bool odd_y = (sy & 0x01); @@ -207,40 +207,42 @@ U* accumulate_2x2( size_t y, oy; size_t zoff, ozoff, oyoff; - for (size_t w = 0; w < sw; w++) { - for (size_t z = 0; z < sz; z++) { - zoff = sxy * (z + sz * w); - ozoff = osxy * (z + sz * w); - - for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - - y++; - - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - } + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (size_t z = 0; z < sz; z++) { + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * (z + sz * (w + sw * v)); - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), oyoff - ); - - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - accum[x + oyoff] *= 2; + for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + + y++; + + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), oyoff + ); + + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + accum[x + oyoff] *= 2; + } } } } @@ -253,7 +255,7 @@ template T* accumulate_2x2f( T* channel, const size_t sx, const size_t sy, - const size_t sz = 1, const size_t sw = 1 + const size_t sz = 1, const size_t sw = 1, const size_t sv = 1 ) { const size_t sxy = sx * sy; @@ -261,7 +263,7 @@ T* accumulate_2x2f( const size_t osx = (sx + 1) >> 1; const size_t osy = (sy + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * sz * sw; + const size_t ovoxels = osxy * sz * sw * sv; const bool odd_y = (sy & 0x01); @@ -270,33 +272,35 @@ T* accumulate_2x2f( size_t y, oy; size_t zoff, ozoff, oyoff; - for (size_t w = 0; w < sw; w++) { - for (size_t z = 0; z < sz; z++) { - zoff = sxy * (z + sz * w); - ozoff = osxy * (z + sz * w); - - for (y = 0, oy = 0; y < sy - (size_t)odd_y; y += 2, oy++) { - accumulate_2x2_dual_pass( - channel, accum, - sx, sy, - osx, osy, - (sx * y + zoff), (osx * oy + ozoff) - ); - } + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (size_t z = 0; z < sz; z++) { + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * (z + sz * (w + sw * v)); - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), oyoff - ); - - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - accum[x + oyoff] *= 2; + for (y = 0, oy = 0; y < sy - (size_t)odd_y; y += 2, oy++) { + accumulate_2x2_dual_pass( + channel, accum, + sx, sy, + osx, osy, + (sx * y + zoff), (osx * oy + ozoff) + ); + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), oyoff + ); + + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + accum[x + oyoff] *= 2; + } } } } @@ -311,7 +315,7 @@ template T* _average_pooling_2x2_single_mip( const T* channel, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1, + const size_t sz, const size_t sw = 1, const size_t sv = 1, T* out = NULL, const bool sparse = false ) { using T2 = typename PromotedType::type; // e.g. uint8_t -> uint16_t @@ -321,7 +325,7 @@ T* _average_pooling_2x2_single_mip( const size_t osx = (sx + 1) >> 1; const size_t osy = (sy + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * sz * sw; + const size_t ovoxels = osxy * sz * sw * sv; const size_t odd_x = static_cast(sx & 0x01); const size_t odd_y = static_cast(sy & 0x01); @@ -333,107 +337,109 @@ T* _average_pooling_2x2_single_mip( size_t x, ox, y, oy; size_t zoff, ozoff; - for (size_t w = 0; w < sw; w++) { - for (size_t z = 0; z < sz; z++) { - zoff = sxy * (z + sz * w); - ozoff = osxy * (z + sz * w); - - if (sparse) { - for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - T a = channel[x + sx * y + zoff]; - T b = channel[(x+1) + sx * y + zoff]; - T c = channel[x + sx * (y+1) + zoff]; - T d = channel[(x+1) + sx * (y+1) + zoff]; + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (size_t z = 0; z < sz; z++) { + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * (z + sz * (w + sw * v)); - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(a) - + static_cast(b) - + static_cast(c) - + static_cast(d) - ) / static_cast( + if (sparse) { + for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + T a = channel[x + sx * y + zoff]; + T b = channel[(x+1) + sx * y + zoff]; + T c = channel[x + sx * (y+1) + zoff]; + T d = channel[(x+1) + sx * (y+1) + zoff]; + + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(a) + + static_cast(b) + + static_cast(c) + + static_cast(d) + ) / static_cast( + std::max( + static_cast((a > 0) + (b > 0) + (c > 0) + (d > 0)), + static_cast(1) + ) + ) + ); + } + + if (odd_x) { + T a = channel[x + sx * y + zoff]; + T b = channel[x + sx * (y+1) + zoff]; + out[(ox - 1) + osx * oy + ozoff] = static_cast( + ( + static_cast(a) + + static_cast(b) + ) / static_cast( std::max( - static_cast((a > 0) + (b > 0) + (c > 0) + (d > 0)), + static_cast((a > 0) + (b > 0)), static_cast(1) - ) ) - ); + )); + } } + } + else { + for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + + static_cast(channel[x + sx * (y+1) + zoff]) + + static_cast(channel[(x+1) + sx * (y+1) + zoff]) + ) / 4 + ); + } + + if (odd_x) { + out[(ox - 1) + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[x + sx * (y+1) + zoff]) + ) / 2 + ); + } + } + } - if (odd_x) { - T a = channel[x + sx * y + zoff]; - T b = channel[x + sx * (y+1) + zoff]; - out[(ox - 1) + osx * oy + ozoff] = static_cast( - ( - static_cast(a) - + static_cast(b) - ) / static_cast( - std::max( - static_cast((a > 0) + (b > 0)), - static_cast(1) - ) - )); + if (odd_y) { + y = sy - 1; + oy = osy - 1; + + if (sparse) { + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + ) / 2 + ); + } } - } - } - else { - for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - + static_cast(channel[x + sx * (y+1) + zoff]) - + static_cast(channel[(x+1) + sx * (y+1) + zoff]) - ) / 4 - ); + else { + T a = channel[x + sx * y + zoff]; + T b = channel[(x+1) + sx * y + zoff]; + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(a) + + static_cast(b) + ) / static_cast(std::max( + static_cast((a > 0) + (b > 0)), + static_cast(1) + )) + ); + } } if (odd_x) { - out[(ox - 1) + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[x + sx * (y+1) + zoff]) - ) / 2 - ); - } - } - } - - if (odd_y) { - y = sy - 1; - oy = osy - 1; - - if (sparse) { - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - ) / 2 - ); + out[(ox - 1) + osx * oy + ozoff] = channel[x + sx * y + zoff]; // corner } } - else { - T a = channel[x + sx * y + zoff]; - T b = channel[(x+1) + sx * y + zoff]; - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(a) - + static_cast(b) - ) / static_cast(std::max( - static_cast((a > 0) + (b > 0)), - static_cast(1) - )) - ); - } - } - - if (odd_x) { - out[(ox - 1) + osx * oy + ozoff] = channel[x + sx * y + zoff]; // corner - } } } } @@ -447,7 +453,7 @@ template U* accumulate_2x2x2( T* channel, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1 + const size_t sz, const size_t sw = 1, const size_t sv = 1 ) { const size_t sxy = sx * sy; @@ -456,7 +462,7 @@ U* accumulate_2x2x2( const size_t osy = (sy + 1) >> 1; const size_t osz = (sz + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * osz * sw; + const size_t ovoxels = osxy * osz * sw * sv; const bool odd_y = (sy & 0x01); const bool odd_z = (sz & 0x01); @@ -467,12 +473,58 @@ U* accumulate_2x2x2( size_t z, oz; size_t zoff, ozoff, oyoff; - for (size_t w = 0; w < sw; w++) { - for (z = 0, oz = 0; z < sz - (size_t)odd_z; oz++) { - ozoff = osxy * (oz + osz * w); - - for (size_t zz = 0; zz < 2 && z < sz - (size_t)odd_z; zz++, z++) { - zoff = sxy * (z + sz * w); + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (z = 0, oz = 0; z < sz - (size_t)odd_z; oz++) { + ozoff = osxy * (oz + osz * (w + sw * v)); + + for (size_t zz = 0; zz < 2 && z < sz - (size_t)odd_z; zz++, z++) { + zoff = sxy * (z + sz * (w + sw * v)); + + for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + + y++; + + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + accumulate_x_pass( + channel, accum, + sx, osx, + (sx * y + zoff), oyoff + ); + } + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + accum[x + oyoff] *= 2; + } + } + } + + if (odd_z) { + z = sz - 1; + oz = osz - 1; + ozoff = osxy * (oz + osz * (w + sw * v)); + zoff = sxy * (z + sz * (w + sw * v)); for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { accumulate_x_pass( @@ -499,62 +551,18 @@ U* accumulate_2x2x2( sx, osx, (sx * y + zoff), oyoff ); + + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + accum[x + oyoff] *= 2; + } } - } - - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - accum[x + oyoff] *= 2; - } - } - } - if (odd_z) { - z = sz - 1; - oz = osz - 1; - ozoff = osxy * (oz + osz * w); - zoff = sxy * (z + sz * w); - - for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - - y++; - - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - } - - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - accumulate_x_pass( - channel, accum, - sx, osx, - (sx * y + zoff), oyoff - ); - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - accum[x + oyoff] *= 2; + for (size_t i = 0; i < osxy; i++) { + accum[i + ozoff] *= 2; } } - - // double values to prevent darkening - for (size_t i = 0; i < osxy; i++) { - accum[i + ozoff] *= 2; - } } } @@ -565,7 +573,7 @@ template T* _average_pooling_2x2x2_single_mip( const T* channel, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1, + const size_t sz, const size_t sw = 1, const size_t sv = 1, T* out = NULL ) { using T2 = typename PromotedType::type; // e.g. uint8_t -> uint16_t @@ -576,7 +584,7 @@ T* _average_pooling_2x2x2_single_mip( const size_t osy = (sy + 1) >> 1; const size_t osz = (sz + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * sz * sw; + const size_t ovoxels = osxy * sz * sw * sv; const size_t odd_x = static_cast(sx & 0x01); const size_t odd_y = static_cast(sy & 0x01); @@ -589,107 +597,109 @@ T* _average_pooling_2x2x2_single_mip( size_t x, ox, y, oy, z, oz; size_t zoff, ozoff; - for (size_t w = 0; w < sw; w++) { - for (z = 0, oz = 0; z < sz - odd_z; z += 2, oz++) { - zoff = sxy * (z + sz * w); - ozoff = osxy * (oz + osz * w); - - for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - + static_cast(channel[x + sx * (y+1) + zoff]) - + static_cast(channel[(x+1) + sx * (y+1) + zoff]) - + static_cast(channel[x + sx * y + zoff + sxy]) - + static_cast(channel[(x+1) + sx * y + zoff + sxy]) - + static_cast(channel[x + sx * (y+1) + zoff + sxy]) - + static_cast(channel[(x+1) + sx * (y+1) + zoff + sxy]) - ) / 8 - ); - } + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (z = 0, oz = 0; z < sz - odd_z; z += 2, oz++) { + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * (oz + osz * (w + sw * v)); - if (odd_x) { - x = sx - 1; - ox = osx - 1; - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[x + sx * (y+1) + zoff]) - + static_cast(channel[x + sx * y + zoff + sxy]) - + static_cast(channel[x + sx * (y+1) + zoff + sxy]) - ) / 4 - ); + for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + + static_cast(channel[x + sx * (y+1) + zoff]) + + static_cast(channel[(x+1) + sx * (y+1) + zoff]) + + static_cast(channel[x + sx * y + zoff + sxy]) + + static_cast(channel[(x+1) + sx * y + zoff + sxy]) + + static_cast(channel[x + sx * (y+1) + zoff + sxy]) + + static_cast(channel[(x+1) + sx * (y+1) + zoff + sxy]) + ) / 8 + ); + } + + if (odd_x) { + x = sx - 1; + ox = osx - 1; + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[x + sx * (y+1) + zoff]) + + static_cast(channel[x + sx * y + zoff + sxy]) + + static_cast(channel[x + sx * (y+1) + zoff + sxy]) + ) / 4 + ); + } } - } - if (odd_y) { - y = sy - 1; - oy = osy - 1; + if (odd_y) { + y = sy - 1; + oy = osy - 1; - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - + static_cast(channel[x + sx * y + zoff + sxy]) - + static_cast(channel[(x+1) + sx * y + zoff + sxy]) - ) / 4 - ); - } + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + + static_cast(channel[x + sx * y + zoff + sxy]) + + static_cast(channel[(x+1) + sx * y + zoff + sxy]) + ) / 4 + ); + } - if (odd_x) { - out[(osx - 1) + osx * oy + ozoff] = ( - channel[(sx-1) + sx * (sy-1) + zoff] - + channel[(sx-1) + sx * (sy-1) + zoff + sxy] // corner - ) / 2; + if (odd_x) { + out[(osx - 1) + osx * oy + ozoff] = ( + channel[(sx-1) + sx * (sy-1) + zoff] + + channel[(sx-1) + sx * (sy-1) + zoff + sxy] // corner + ) / 2; + } } } - } - if (odd_z) { - z = sz - 1; - zoff = sxy * (z + sz * w); - ozoff = osxy * ((osz - 1) + osz * w); - - for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - + static_cast(channel[x + sx * (y+1) + zoff]) - + static_cast(channel[(x+1) + sx * (y+1) + zoff]) - ) / 4 - ); - } + if (odd_z) { + z = sz - 1; + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * ((osz - 1) + osz * (w + sw * v)); - if (odd_x) { - out[(osx - 1) + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[(sx - 1) + sx * y + zoff]) - + static_cast(channel[(sx - 1) + sx * (y+1) + zoff]) - ) / 2 - ); + for (y = 0, oy = 0; y < sy - odd_y; y += 2, oy++) { + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + + static_cast(channel[x + sx * (y+1) + zoff]) + + static_cast(channel[(x+1) + sx * (y+1) + zoff]) + ) / 4 + ); + } + + if (odd_x) { + out[(osx - 1) + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[(sx - 1) + sx * y + zoff]) + + static_cast(channel[(sx - 1) + sx * (y+1) + zoff]) + ) / 2 + ); + } } - } - if (odd_y) { - y = sy - 1; - oy = osy - 1; + if (odd_y) { + y = sy - 1; + oy = osy - 1; - for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { - out[ox + osx * oy + ozoff] = static_cast( - ( - static_cast(channel[x + sx * y + zoff]) - + static_cast(channel[(x+1) + sx * y + zoff]) - ) / 2 - ); - } + for (x = 0, ox = 0; x < sx - odd_x; x += 2, ox++) { + out[ox + osx * oy + ozoff] = static_cast( + ( + static_cast(channel[x + sx * y + zoff]) + + static_cast(channel[(x+1) + sx * y + zoff]) + ) / 2 + ); + } - if (odd_x) { - out[(osx-1) + osx * oy + ozoff] = channel[(sx-1) + sx * (sy-1) + zoff]; // corner + if (odd_x) { + out[(osx-1) + osx * oy + ozoff] = channel[(sx-1) + sx * (sy-1) + zoff]; // corner + } } } } @@ -728,7 +738,8 @@ template U* denominator_2x2( T* channel, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1 + const size_t sz, + const size_t sw = 1, const size_t sv = 1 ) { const size_t sxy = sx * sy; @@ -737,7 +748,7 @@ U* denominator_2x2( const size_t osy = (sy + 1) >> 1; const size_t osz = sz; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * osz * sw; + const size_t ovoxels = osxy * osz * sw * sv; const bool odd_y = (sy & 0x01); @@ -746,46 +757,48 @@ U* denominator_2x2( size_t y, oy; size_t zoff, ozoff, oyoff; - for (size_t w = 0; w < sw; w++) { - for (size_t z = 0; z < sz; z++) { - zoff = sxy * (z + sz * w); - ozoff = osxy * (z + sz * w); - - for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - - y++; - - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (size_t z = 0; z < sz; z++) { + zoff = sxy * (z + sz * (w + sw * v)); + ozoff = osxy * (z + sz * (w + sw * v)); + + for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + + y++; + + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), oyoff + ); + } } if (odd_y) { y = sy - 1; oy = osy - 1; oyoff = (osx * oy + ozoff); - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), oyoff - ); - } - } - - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - denom[x + oyoff] *= 2; + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + denom[x + oyoff] *= 2; + } } } } @@ -798,7 +811,8 @@ template U* denominator_2x2x2( T* channel, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1 + const size_t sz, + const size_t sw = 1, const size_t sv = 1 ) { const size_t sxy = sx * sy; @@ -807,7 +821,7 @@ U* denominator_2x2x2( const size_t osy = (sy + 1) >> 1; const size_t osz = (sz + 1) >> 1; const size_t osxy = osx * osy; - const size_t ovoxels = osxy * osz * sw; + const size_t ovoxels = osxy * osz * sw * sv; const bool odd_y = (sy & 0x01); const bool odd_z = (sz & 0x01); @@ -818,12 +832,58 @@ U* denominator_2x2x2( size_t z, oz; size_t zoff, ozoff, oyoff; - for (size_t w = 0; w < sw; w++) { - for (z = 0, oz = 0; z < sz - (size_t)odd_z; oz++) { - ozoff = osxy * (oz + osz * w); - - for (size_t zz = 0; zz < 2 && z < sz - (size_t)odd_z; zz++, z++) { - zoff = sxy * (z + sz * w); + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (z = 0, oz = 0; z < sz - (size_t)odd_z; oz++) { + ozoff = osxy * (oz + osz * (w + sw * v)); + + for (size_t zz = 0; zz < 2 && z < sz - (size_t)odd_z; zz++, z++) { + zoff = sxy * (z + sz * (w + sw * v)); + + for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + + y++; + + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), (osx * oy + ozoff) + ); + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + denominator_x_pass( + channel, denom, + sx, osx, + (sx * y + zoff), oyoff + ); + } + } + + if (odd_y) { + y = sy - 1; + oy = osy - 1; + oyoff = (osx * oy + ozoff); + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + denom[x + oyoff] *= 2; + } + } + } + + if (odd_z) { + z = sz - 1; + oz = osz - 1; + ozoff = osxy * (oz + osz * (w + sw * v)); + zoff = sxy * (z + sz * (w + sw * v)); for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { denominator_x_pass( @@ -850,62 +910,18 @@ U* denominator_2x2x2( sx, osx, (sx * y + zoff), oyoff ); - } - } - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - denom[x + oyoff] *= 2; + // double values to prevent darkening + for (size_t x = 0; x < osx; x++) { + denom[x + oyoff] *= 2; + } } - } - } - - if (odd_z) { - z = sz - 1; - oz = osz - 1; - ozoff = osxy * (oz + osz * w); - zoff = sxy * (z + sz * w); - - for (y = 0, oy = 0; y < sy - (size_t)odd_y; y++, oy++) { - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - - y++; - - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), (osx * oy + ozoff) - ); - } - - if (odd_y) { - y = sy - 1; - oy = osy - 1; - oyoff = (osx * oy + ozoff); - denominator_x_pass( - channel, denom, - sx, osx, - (sx * y + zoff), oyoff - ); // double values to prevent darkening - for (size_t x = 0; x < osx; x++) { - denom[x + oyoff] *= 2; + for (size_t i = 0; i < osxy; i++) { + denom[i + ozoff] *= 2; } } - - // double values to prevent darkening - for (size_t i = 0; i < osxy; i++) { - denom[i + ozoff] *= 2; - } } } @@ -963,13 +979,15 @@ inline void _mode_pooling_2x2( T* img, T* oimg, const size_t sx, const size_t sy, const size_t sz, const size_t sw, + const size_t sv, const size_t stride_x, const size_t stride_y, - const size_t stride_z, const size_t stride_w + const size_t stride_z, const size_t stride_w, + const size_t stride_v ) { const size_t osx = (sx + 1) >> 1; const size_t osy = (sy + 1) >> 1; - size_t x, y, z, w; + size_t x, y, z, w, v; T a, b, c, d; size_t ox, oy, out; @@ -977,56 +995,58 @@ inline void _mode_pooling_2x2( const size_t xodd = (sx & 0x01); const size_t yodd = (sy & 0x01); - auto idx = [&](size_t x, size_t y, size_t z, size_t w) { - return x * stride_x + y * stride_y + z * stride_z + w * stride_w; + auto idx = [&](size_t x, size_t y, size_t z, size_t w, size_t v) { + return x * stride_x + y * stride_y + z * stride_z + w * stride_w + v * stride_v; }; - auto oidx = [&](size_t x, size_t y, size_t z, size_t w) { - return x + osx * (y + osy * (z + sz * w)); + auto oidx = [&](size_t x, size_t y, size_t z, size_t w, size_t v) { + return x + osx * (y + osy * (z + sz * (w + sw * v))); }; - for (w = 0; w < sw; w++) { - for (z = 0; z < sz; z++) { - oy = 0; - - for (y = 0; y < sy - yodd; y += 2) { - ox = 0; - - for (x = 0; x < sx - xodd; x += 2) { - a = img[idx(x,y,z,w)]; - b = img[idx(x+1,y,z,w)]; - c = img[idx(x,y+1,z,w)]; - d = img[idx(x+1,y+1,z,w)]; - - out = oidx(ox, oy, z, w); - - if (a == b) { - oimg[out] = a; + for (v = 0; v < sv; v++) { + for (w = 0; w < sw; w++) { + for (z = 0; z < sz; z++) { + oy = 0; + + for (y = 0; y < sy - yodd; y += 2) { + ox = 0; + + for (x = 0; x < sx - xodd; x += 2) { + a = img[idx(x,y,z,w,v)]; + b = img[idx(x+1,y,z,w,v)]; + c = img[idx(x,y+1,z,w,v)]; + d = img[idx(x+1,y+1,z,w,v)]; + + out = oidx(ox, oy, z, w, v); + + if (a == b) { + oimg[out] = a; + } + else if (a == c) { + oimg[out] = a; + } + else if (b == c) { + oimg[out] = b; + } + else { + oimg[out] = d; + } + + ox++; } - else if (a == c) { - oimg[out] = a; + if (xodd) { + out = oidx(osx - 1, oy, z, w, v); + oimg[out] = img[idx(sx - 1, y, z, w, v)]; } - else if (b == c) { - oimg[out] = b; + oy++; + } + if (yodd) { + for (x = 0; x < osx - xodd; x++) { + oimg[oidx(x, osy - 1, z, w, v)] = img[idx(2*x, sy - 1, z, w, v)]; } - else { - oimg[out] = d; + if (xodd) { + oimg[oidx(osx - 1, osy - 1, z, w, v)] = img[idx(sx - 1, sy - 1, z, w, v)]; } - - ox++; - } - if (xodd) { - out = oidx(osx - 1, oy, z, w); - oimg[out] = img[idx(sx - 1, y, z, w)]; - } - oy++; - } - if (yodd) { - for (x = 0; x < osx - xodd; x++) { - oimg[oidx(x, osy - 1, z, w)] = img[idx(2*x, sy - 1, z, w)]; - } - if (xodd) { - oimg[oidx(osx - 1, osy - 1, z, w)] = img[idx(sx - 1, sy - 1, z, w)]; } } } @@ -1040,7 +1060,7 @@ template inline void _mode_pooling_2x2x2( T* img, T* oimg, const size_t sx, const size_t sy, - const size_t sz, const size_t sw = 1, + const size_t sz, const size_t sw = 1, const size_t sv = 1, const bool sparse = false ) { @@ -1048,69 +1068,72 @@ inline void _mode_pooling_2x2x2( const size_t osx = (sx + 1) >> 1; const size_t osy = (sy + 1) >> 1; - const size_t osxy = osx * osy; T vals[8]; T cur_val, max_val; size_t max_ct, cur_ct; - for (size_t z = 0; z < sz; z += 2) { - for (size_t y = 0; y < sy; y += 2) { - for (size_t x = 0; x < sx; x += 2) { - size_t offset = x + sx * y + sxy * z; - - size_t plus_x = (x < sx - 1); - size_t plus_y = (y < sy - 1) * sx; - size_t plus_z = (z < sz - 1) * sxy; - - vals[0] = img[ offset ]; - vals[1] = img[ offset + plus_x ]; - vals[2] = img[ offset + plus_y ]; - vals[3] = img[ offset + plus_x + plus_y ]; - vals[4] = img[ offset + plus_z ]; - vals[5] = img[ offset + plus_x + plus_z ]; - vals[6] = img[ offset + plus_y + plus_z ]; - vals[7] = img[ offset + plus_x + plus_y + plus_z ]; - - size_t o_loc = (x >> 1) + osx * (y >> 1) + osxy * (z >> 1); - // These two if statements could be removed, but they add a very small - // cost on random data (< 10%) and can speed up connectomics data by ~4x - if (vals[0] == vals[1] && vals[0] == vals[2] && vals[0] == vals[3] && (!sparse || vals[0] != 0)) { - oimg[o_loc] = vals[0]; - continue; - } - else if (vals[4] == vals[5] && vals[4] == vals[6] && vals[4] == vals[7] && (!sparse || vals[4] != 0)) { - oimg[o_loc] = vals[4]; - continue; - } - - max_ct = 0; - max_val = 0; - for (short int t = 0; t < 8; t++) { - cur_val = vals[t]; - if (sparse && cur_val == 0) { - continue; - } - - cur_ct = 1; - for (short int p = 0; p < t; p++) { - cur_ct += (cur_val == vals[p]); - } - for (short int p = t + 1; p < 8; p++) { - cur_ct += (cur_val == vals[p]); - } - - if (cur_ct >= 4) { - max_val = cur_val; - break; - } - else if (cur_ct > max_ct) { - max_ct = cur_ct; - max_val = cur_val; + for (size_t v = 0; v < sv; v++) { + for (size_t w = 0; w < sw; w++) { + for (size_t z = 0; z < sz; z += 2) { + for (size_t y = 0; y < sy; y += 2) { + for (size_t x = 0; x < sx; x += 2) { + size_t offset = x + sx * (y + sy * (z + sz * (w + sw * v))); + + size_t plus_x = (x < sx - 1); + size_t plus_y = (y < sy - 1) * sx; + size_t plus_z = (z < sz - 1) * sxy; + + vals[0] = img[ offset ]; + vals[1] = img[ offset + plus_x ]; + vals[2] = img[ offset + plus_y ]; + vals[3] = img[ offset + plus_x + plus_y ]; + vals[4] = img[ offset + plus_z ]; + vals[5] = img[ offset + plus_x + plus_z ]; + vals[6] = img[ offset + plus_y + plus_z ]; + vals[7] = img[ offset + plus_x + plus_y + plus_z ]; + + size_t o_loc = (x >> 1) + osx * ((y >> 1) + osy * ((z >> 1) + sz * (w + sw * v))); + // These two if statements could be removed, but they add a very small + // cost on random data (< 10%) and can speed up connectomics data by ~4x + if (vals[0] == vals[1] && vals[0] == vals[2] && vals[0] == vals[3] && (!sparse || vals[0] != 0)) { + oimg[o_loc] = vals[0]; + continue; + } + else if (vals[4] == vals[5] && vals[4] == vals[6] && vals[4] == vals[7] && (!sparse || vals[4] != 0)) { + oimg[o_loc] = vals[4]; + continue; + } + + max_ct = 0; + max_val = 0; + for (short int t = 0; t < 8; t++) { + cur_val = vals[t]; + if (sparse && cur_val == 0) { + continue; + } + + cur_ct = 1; + for (short int p = 0; p < t; p++) { + cur_ct += (cur_val == vals[p]); + } + for (short int p = t + 1; p < 8; p++) { + cur_ct += (cur_val == vals[p]); + } + + if (cur_ct >= 4) { + max_val = cur_val; + break; + } + else if (cur_ct > max_ct) { + max_ct = cur_ct; + max_val = cur_val; + } + } + + oimg[o_loc] = max_val; } } - - oimg[o_loc] = max_val; } } } diff --git a/tinybrain/accelerated.pyx b/tinybrain/accelerated.pyx index 0b52898..9c1a9b8 100644 --- a/tinybrain/accelerated.pyx +++ b/tinybrain/accelerated.pyx @@ -22,13 +22,13 @@ import numpy as np np.import_array() cdef extern from "accelerated.hpp" namespace "accelerated": - cdef T* _average_pooling_2x2_single_mip[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, T* out, uint8_t sparse) - cdef U* accumulate_2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw) - cdef T* accumulate_2x2f[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw) - cdef U* denominator_2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw) - cdef T* _average_pooling_2x2x2_single_mip[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, T* out) - cdef U* accumulate_2x2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw) - cdef U* denominator_2x2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw) + cdef T* _average_pooling_2x2_single_mip[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv, T* out, uint8_t sparse) + cdef U* accumulate_2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv) + cdef T* accumulate_2x2f[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv) + cdef U* denominator_2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv) + cdef T* _average_pooling_2x2x2_single_mip[T](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv, T* out) + cdef U* accumulate_2x2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv) + cdef U* denominator_2x2x2[T, U](T* arr, size_t sx, size_t sy, size_t sz, size_t sw, size_t sv) cdef void render_image[T, U](T* arr, U* oimg, uint32_t bitshift, size_t ovoxels) cdef void render_image_sparse[T, U](T* numerator, T* denominator, U* oimg, size_t ovoxels) cdef void render_image_floating[T](T* arr, T* oimg, T divisor, size_t ovoxels) @@ -37,14 +37,15 @@ cdef extern from "accelerated.hpp" namespace "accelerated": cdef void _mode_pooling_2x2[T]( T* img, T* oimg, size_t sx, size_t sy, - size_t sz, size_t sw, + size_t sz, size_t sw, size_t sv, size_t stride_x, size_t stride_y, - size_t stride_z, size_t stride_w + size_t stride_z, size_t stride_w, + size_t stride_v ) cdef void _mode_pooling_2x2x2[T]( T* img, T* oimg, size_t sx, size_t sy, - size_t sz, size_t sw, + size_t sz, size_t sw, size_t sv, uint8_t sparse ) @@ -80,7 +81,7 @@ ctypedef fused NUMBER: def average_pooling_2x2(channel, size_t num_mips=1, sparse=False): ndim = channel.ndim - channel = expand_dims(channel, 4) + channel = expand_dims(channel, 5) cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] @@ -107,80 +108,82 @@ def average_pooling_2x2(channel, size_t num_mips=1, sparse=False): return results -def _average_pooling_2x2_single_mip_py(np.ndarray[NUMBER, ndim=4] channel, sparse): - cdef uint8_t[:,:,:,:] arr_memview8u - cdef uint16_t[:,:,:,:] arr_memview16u - cdef uint32_t[:,:,:,:] arr_memview32u - cdef uint64_t[:,:,:,:] arr_memview64u - cdef float[:,:,:,:] arr_memviewf - cdef double[:,:,:,:] arr_memviewd - - cdef uint8_t[:,:,:,:] out_memview8u - cdef uint16_t[:,:,:,:] out_memview16u - cdef uint32_t[:,:,:,:] out_memview32u - cdef uint64_t[:,:,:,:] out_memview64u - cdef float[:,:,:,:] out_memviewf - cdef double[:,:,:,:] out_memviewd +def _average_pooling_2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel, sparse): + cdef uint8_t[:,:,:,:,:] arr_memview8u + cdef uint16_t[:,:,:,:,:] arr_memview16u + cdef uint32_t[:,:,:,:,:] arr_memview32u + cdef uint64_t[:,:,:,:,:] arr_memview64u + cdef float[:,:,:,:,:] arr_memviewf + cdef double[:,:,:,:,:] arr_memviewd + + cdef uint8_t[:,:,:,:,:] out_memview8u + cdef uint16_t[:,:,:,:,:] out_memview16u + cdef uint32_t[:,:,:,:,:] out_memview32u + cdef uint64_t[:,:,:,:,:] out_memview64u + cdef float[:,:,:,:,:] out_memviewf + cdef double[:,:,:,:,:] out_memviewd cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] - shape = ( (sx+1)//2, (sy+1)//2, sz, sw ) + shape = ( (sx+1)//2, (sy+1)//2, sz, sw, sv ) - cdef np.ndarray[NUMBER, ndim=4] out = np.zeros(shape, dtype=channel.dtype, order="F") + cdef np.ndarray[NUMBER, ndim=5] out = np.zeros(shape, dtype=channel.dtype, order="F") if channel.dtype == np.uint8: arr_memview8u = channel out_memview8u = out - _average_pooling_2x2_single_mip[uint8_t](&arr_memview8u[0,0,0,0], sx, sy, sz, sw, &out_memview8u[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[uint8_t](&arr_memview8u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview8u[0,0,0,0,0], bool(sparse)) elif channel.dtype == np.uint16: arr_memview16u = channel out_memview16u = out - _average_pooling_2x2_single_mip[uint16_t](&arr_memview16u[0,0,0,0], sx, sy, sz, sw, &out_memview16u[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[uint16_t](&arr_memview16u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview16u[0,0,0,0,0], bool(sparse)) elif channel.dtype == np.uint32: arr_memview32u = channel out_memview32u = out - _average_pooling_2x2_single_mip[uint32_t](&arr_memview32u[0,0,0,0], sx, sy, sz, sw, &out_memview32u[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[uint32_t](&arr_memview32u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview32u[0,0,0,0,0], bool(sparse)) elif channel.dtype == np.uint64: arr_memview64u = channel out_memview64u = out - _average_pooling_2x2_single_mip[uint64_t](&arr_memview64u[0,0,0,0], sx, sy, sz, sw, &out_memview64u[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[uint64_t](&arr_memview64u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview64u[0,0,0,0,0], bool(sparse)) elif channel.dtype == np.float32: arr_memviewf = channel out_memviewf = out - _average_pooling_2x2_single_mip[float](&arr_memviewf[0,0,0,0], sx, sy, sz, sw, &out_memviewf[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[float](&arr_memviewf[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memviewf[0,0,0,0,0], bool(sparse)) elif channel.dtype == np.float64: arr_memviewd = channel out_memviewd = out - _average_pooling_2x2_single_mip[double](&arr_memviewd[0,0,0,0], sx, sy, sz, sw, &out_memviewd[0,0,0,0], bool(sparse)) + _average_pooling_2x2_single_mip[double](&arr_memviewd[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memviewd[0,0,0,0,0], bool(sparse)) else: raise TypeError("Unsupported data type. " + str(channel.dtype)) return [ out ] -def _average_pooling_2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t num_mips, sparse): +def _average_pooling_2x2_uint8(np.ndarray[uint8_t, ndim=5] channel, uint32_t num_mips, sparse): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * sz * sw + cdef size_t ovoxels = osxy * sz * sw * sv - cdef uint8_t[:,:,:,:] channelview = channel - cdef uint16_t* accum = accumulate_2x2[uint8_t, uint16_t](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef uint8_t[:,:,:,:,:] channelview = channel + cdef uint16_t* accum = accumulate_2x2[uint8_t, uint16_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef uint16_t[:] accumview = accum cdef uint16_t* tmp cdef uint32_t mip, bitshift cdef uint16_t* denominator if sparse: - denominator = denominator_2x2[uint8_t, uint16_t](&channelview[0,0,0,0], sx, sy, sz, sw) + denominator = denominator_2x2[uint8_t, uint16_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef uint8_t[:] oimgview @@ -196,7 +199,7 @@ def _average_pooling_2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t num render_image[uint16_t, uint8_t](&accumview[0], &oimgview[0], bitshift, ovoxels) results.append( - oimg.reshape( (osx, osy, sz, sw), order='F' ) + oimg.reshape( (osx, osy, sz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -211,43 +214,44 @@ def _average_pooling_2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t num osx = (sx + 1) // 2 osy = (sy + 1) // 2 osxy = osx * osy - ovoxels = osxy * sz * sw + ovoxels = osxy * sz * sw * sv tmp = accum - accum = accumulate_2x2[uint16_t, uint16_t](accum, sx, sy, sz, sw) + accum = accumulate_2x2[uint16_t, uint16_t](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denominator - denominator = accumulate_2x2[uint16_t, uint16_t](denominator, sx, sy, sz, sw) + denominator = accumulate_2x2[uint16_t, uint16_t](denominator, sx, sy, sz, sw, sv) PyMem_Free(tmp) PyMem_Free(accum) return results -def _average_pooling_2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t num_mips, sparse): +def _average_pooling_2x2_uint16(np.ndarray[uint16_t, ndim=5] channel, uint32_t num_mips, sparse): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * sz * sw + cdef size_t ovoxels = osxy * sz * sw * sv - cdef uint16_t[:,:,:,:] channelview = channel - cdef uint32_t* accum = accumulate_2x2[uint16_t, uint32_t](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef uint16_t[:,:,:,:,:] channelview = channel + cdef uint32_t* accum = accumulate_2x2[uint16_t, uint32_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef uint32_t[:] accumview = accum cdef uint32_t* tmp cdef uint32_t mip, bitshift cdef uint32_t* denominator if sparse: - denominator = denominator_2x2[uint16_t, uint32_t](&channelview[0,0,0,0], sx, sy, sz, sw) + denominator = denominator_2x2[uint16_t, uint32_t](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef uint16_t[:] oimgview @@ -263,7 +267,7 @@ def _average_pooling_2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t n render_image[uint32_t, uint16_t](&accumview[0], &oimgview[0], bitshift, ovoxels) results.append( - oimg.reshape( (osx, osy, sz, sw), order='F' ) + oimg.reshape( (osx, osy, sz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -278,43 +282,44 @@ def _average_pooling_2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t n osx = (sx + 1) // 2 osy = (sy + 1) // 2 osxy = osx * osy - ovoxels = osxy * sz * sw + ovoxels = osxy * sz * sw * sv tmp = accum - accum = accumulate_2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw) + accum = accumulate_2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denominator - denominator = accumulate_2x2[uint32_t, uint32_t](denominator, sx, sy, sz, sw) + denominator = accumulate_2x2[uint32_t, uint32_t](denominator, sx, sy, sz, sw, sv) PyMem_Free(tmp) PyMem_Free(accum) return results -def _average_pooling_2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num_mips, sparse): +def _average_pooling_2x2_float(np.ndarray[float, ndim=5] channel, uint32_t num_mips, sparse): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * sz * sw + cdef size_t ovoxels = osxy * sz * sw * sv - cdef float[:,:,:,:] channelview = channel - cdef float* accum = accumulate_2x2f[float](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef float[:,:,:,:,:] channelview = channel + cdef float* accum = accumulate_2x2f[float](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef float[:] accumview = accum cdef float* tmp cdef uint32_t mip cdef float* denominator if sparse: - denominator = denominator_2x2[float, float](&channelview[0,0,0,0], sx, sy, sz, sw) + denominator = denominator_2x2[float, float](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef float divisor = 1.0 cdef float[:] oimgview @@ -331,7 +336,7 @@ def _average_pooling_2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num_m render_image_floating[float](&accumview[0], &oimgview[0], divisor, ovoxels) results.append( - oimg.reshape( (osx, osy, sz, sw), order='F' ) + oimg.reshape( (osx, osy, sz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -343,36 +348,37 @@ def _average_pooling_2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num_m osx = (sx + 1) // 2 osy = (sy + 1) // 2 osxy = osx * osy - ovoxels = osxy * sz * sw + ovoxels = osxy * sz * sw * sv tmp = accum - accum = accumulate_2x2f[float](accum, sx, sy, sz, sw) + accum = accumulate_2x2f[float](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denominator - denominator = accumulate_2x2f[float](denominator, sx, sy, sz, sw) + denominator = accumulate_2x2f[float](denominator, sx, sy, sz, sw, sv) PyMem_Free(tmp) PyMem_Free(accum) return results -def _average_pooling_2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num_mips, sparse): +def _average_pooling_2x2_double(np.ndarray[double, ndim=5] channel, uint32_t num_mips, sparse): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * sz * sw + cdef size_t ovoxels = osxy * sz * sw * sv - cdef double[:,:,:,:] channelview = channel - cdef double* accum = accumulate_2x2f[double](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef double[:,:,:,:,:] channelview = channel + cdef double* accum = accumulate_2x2f[double](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef double[:] accumview = accum cdef double* tmp cdef uint32_t mip @@ -382,7 +388,7 @@ def _average_pooling_2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num cdef double* denominator if sparse: - denominator = denominator_2x2[double, double](&channelview[0,0,0,0], sx, sy, sz, sw) + denominator = denominator_2x2[double, double](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) results = [] for mip in range(num_mips): @@ -396,7 +402,7 @@ def _average_pooling_2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num render_image_floating[double](&accumview[0], &oimgview[0], divisor, ovoxels) results.append( - oimg.reshape( (osx, osy, sz, sw), order='F' ) + oimg.reshape( (osx, osy, sz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -408,16 +414,16 @@ def _average_pooling_2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num osx = (sx + 1) // 2 osy = (sy + 1) // 2 osxy = osx * osy - ovoxels = osxy * sz * sw + ovoxels = osxy * sz * sw * sv tmp = accum - accum = accumulate_2x2f[double](accum, sx, sy, sz, sw) + accum = accumulate_2x2f[double](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denominator - denominator = accumulate_2x2f[double](denominator, sx, sy, sz, sw) + denominator = accumulate_2x2f[double](denominator, sx, sy, sz, sw, sv) PyMem_Free(tmp) PyMem_Free(accum) @@ -428,7 +434,7 @@ def _average_pooling_2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num def average_pooling_2x2x2(channel, size_t num_mips=1, sparse=False): ndim = channel.ndim - channel = expand_dims(channel, 4) + channel = expand_dims(channel, 5) cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] @@ -459,75 +465,77 @@ def average_pooling_2x2x2(channel, size_t num_mips=1, sparse=False): return results -def _average_pooling_2x2x2_single_mip_py(np.ndarray[NUMBER, ndim=4] channel): - cdef uint8_t[:,:,:,:] arr_memview8u - cdef uint16_t[:,:,:,:] arr_memview16u - cdef uint32_t[:,:,:,:] arr_memview32u - cdef uint64_t[:,:,:,:] arr_memview64u - cdef float[:,:,:,:] arr_memviewf - cdef double[:,:,:,:] arr_memviewd - - cdef uint8_t[:,:,:,:] out_memview8u - cdef uint16_t[:,:,:,:] out_memview16u - cdef uint32_t[:,:,:,:] out_memview32u - cdef uint64_t[:,:,:,:] out_memview64u - cdef float[:,:,:,:] out_memviewf - cdef double[:,:,:,:] out_memviewd +def _average_pooling_2x2x2_single_mip_py(np.ndarray[NUMBER, ndim=5] channel): + cdef uint8_t[:,:,:,:,:] arr_memview8u + cdef uint16_t[:,:,:,:,:] arr_memview16u + cdef uint32_t[:,:,:,:,:] arr_memview32u + cdef uint64_t[:,:,:,:,:] arr_memview64u + cdef float[:,:,:,:,:] arr_memviewf + cdef double[:,:,:,:,:] arr_memviewd + + cdef uint8_t[:,:,:,:,:] out_memview8u + cdef uint16_t[:,:,:,:,:] out_memview16u + cdef uint32_t[:,:,:,:,:] out_memview32u + cdef uint64_t[:,:,:,:,:] out_memview64u + cdef float[:,:,:,:,:] out_memviewf + cdef double[:,:,:,:,:] out_memviewd cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] - shape = ( (sx+1)//2, (sy+1)//2, (sz+1)//2, sw ) + shape = ( (sx+1)//2, (sy+1)//2, (sz+1)//2, sw, sv ) - cdef np.ndarray[NUMBER, ndim=4] out = np.zeros(shape, dtype=channel.dtype, order="F") + cdef np.ndarray[NUMBER, ndim=5] out = np.zeros(shape, dtype=channel.dtype, order="F") if channel.dtype == np.uint8: arr_memview8u = channel out_memview8u = out - _average_pooling_2x2x2_single_mip[uint8_t](&arr_memview8u[0,0,0,0], sx, sy, sz, sw, &out_memview8u[0,0,0,0]) + _average_pooling_2x2x2_single_mip[uint8_t](&arr_memview8u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview8u[0,0,0,0,0]) elif channel.dtype == np.uint16: arr_memview16u = channel out_memview16u = out - _average_pooling_2x2x2_single_mip[uint16_t](&arr_memview16u[0,0,0,0], sx, sy, sz, sw, &out_memview16u[0,0,0,0]) + _average_pooling_2x2x2_single_mip[uint16_t](&arr_memview16u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview16u[0,0,0,0,0]) elif channel.dtype == np.uint32: arr_memview32u = channel out_memview32u = out - _average_pooling_2x2x2_single_mip[uint32_t](&arr_memview32u[0,0,0,0], sx, sy, sz, sw, &out_memview32u[0,0,0,0]) + _average_pooling_2x2x2_single_mip[uint32_t](&arr_memview32u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview32u[0,0,0,0,0]) elif channel.dtype == np.uint64: arr_memview64u = channel out_memview64u = out - _average_pooling_2x2x2_single_mip[uint64_t](&arr_memview64u[0,0,0,0], sx, sy, sz, sw, &out_memview64u[0,0,0,0]) + _average_pooling_2x2x2_single_mip[uint64_t](&arr_memview64u[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memview64u[0,0,0,0,0]) elif channel.dtype == np.float32: arr_memviewf = channel out_memviewf = out - _average_pooling_2x2x2_single_mip[float](&arr_memviewf[0,0,0,0], sx, sy, sz, sw, &out_memviewf[0,0,0,0]) + _average_pooling_2x2x2_single_mip[float](&arr_memviewf[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memviewf[0,0,0,0,0]) elif channel.dtype == np.float64: arr_memviewd = channel out_memviewd = out - _average_pooling_2x2x2_single_mip[double](&arr_memviewd[0,0,0,0], sx, sy, sz, sw, &out_memviewd[0,0,0,0]) + _average_pooling_2x2x2_single_mip[double](&arr_memviewd[0,0,0,0,0], sx, sy, sz, sw, sv, &out_memviewd[0,0,0,0,0]) else: raise TypeError("Unsupported data type. " + str(channel.dtype)) return [ out ] -def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t num_mips, sparse=False): +def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=5] channel, uint32_t num_mips, sparse=False): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osz = (sz + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * osz * sw + cdef size_t ovoxels = osxy * osz * sw * sv - cdef uint8_t[:,:,:,:] channelview = channel + cdef uint8_t[:,:,:,:,:] channelview = channel cdef uint32_t* accum = accumulate_2x2x2[uint8_t, uint32_t]( - &channelview[0,0,0,0], sx, sy, sz, sw + &channelview[0,0,0,0,0], sx, sy, sz, sw, sv ) cdef uint32_t[:] accumview = accum @@ -536,7 +544,7 @@ def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t n cdef uint32_t[:] denomview if sparse: denom = denominator_2x2x2[uint8_t, uint32_t]( - &channelview[0,0,0,0], sx, sy, sz, sw + &channelview[0,0,0,0,0], sx, sy, sz, sw, sv ) denomview = denom @@ -576,16 +584,16 @@ def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t n osy = (sy + 1) // 2 osz = (sz + 1) // 2 osxy = osx * osy - ovoxels = osxy * osz * sw + ovoxels = osxy * osz * sw * sv tmp = accum - accum = accumulate_2x2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw) + accum = accumulate_2x2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denom - denom = accumulate_2x2x2[uint32_t, uint32_t](denom, sx, sy, sz, sw) + denom = accumulate_2x2x2[uint32_t, uint32_t](denom, sx, sy, sz, sw, sv) denomview = denom PyMem_Free(tmp) @@ -595,22 +603,23 @@ def _average_pooling_2x2x2_uint8(np.ndarray[uint8_t, ndim=4] channel, uint32_t n return results -def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t num_mips, sparse): +def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=5] channel, uint32_t num_mips, sparse): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osz = (sz + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * osz * sw + cdef size_t ovoxels = osxy * osz * sw * sv - cdef uint16_t[:,:,:,:] channelview = channel + cdef uint16_t[:,:,:,:,:] channelview = channel cdef uint32_t* accum = accumulate_2x2x2[uint16_t, uint32_t]( - &channelview[0,0,0,0], sx, sy, sz, sw + &channelview[0,0,0,0,0], sx, sy, sz, sw, sv ) cdef uint32_t[:] accumview = accum @@ -619,7 +628,7 @@ def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t cdef uint32_t[:] denomview if sparse: denom = denominator_2x2x2[uint16_t, uint32_t]( - &channelview[0,0,0,0], sx, sy, sz, sw + &channelview[0,0,0,0,0], sx, sy, sz, sw, sv ) denomview = denom @@ -639,7 +648,7 @@ def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t render_image[uint32_t, uint16_t](&accumview[0], &oimgview[0], bitshift, ovoxels) results.append( - oimg.reshape( (osx, osy, osz, sw), order='F' ) + oimg.reshape( (osx, osy, osz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -658,16 +667,16 @@ def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t osy = (sy + 1) // 2 osz = (sz + 1) // 2 osxy = osx * osy - ovoxels = osxy * osz * sw + ovoxels = osxy * osz * sw * sv tmp = accum - accum = accumulate_2x2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw) + accum = accumulate_2x2x2[uint32_t, uint32_t](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) if sparse: tmp = denom - denom = accumulate_2x2x2[uint32_t, uint32_t](denom, sx, sy, sz, sw) + denom = accumulate_2x2x2[uint32_t, uint32_t](denom, sx, sy, sz, sw, sv) denomview = denom PyMem_Free(tmp) @@ -677,21 +686,22 @@ def _average_pooling_2x2x2_uint16(np.ndarray[uint16_t, ndim=4] channel, uint32_t return results -def _average_pooling_2x2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num_mips): +def _average_pooling_2x2x2_float(np.ndarray[float, ndim=5] channel, uint32_t num_mips): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osz = (sz + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * osz * sw + cdef size_t ovoxels = osxy * osz * sw * sv - cdef float[:,:,:,:] channelview = channel - cdef float* accum = accumulate_2x2x2[float,float](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef float[:,:,:,:,:] channelview = channel + cdef float* accum = accumulate_2x2x2[float,float](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef float[:] accumview = accum cdef float* tmp cdef uint32_t mip @@ -707,7 +717,7 @@ def _average_pooling_2x2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num render_image_floating[float](&accumview[0], &oimgview[0], divisor, ovoxels) results.append( - oimg.reshape( (osx, osy, osz, sw), order='F' ) + oimg.reshape( (osx, osy, osz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -721,10 +731,10 @@ def _average_pooling_2x2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num osy = (sy + 1) // 2 osz = (sz + 1) // 2 osxy = osx * osy - ovoxels = osxy * osz * sw + ovoxels = osxy * osz * sw * sv tmp = accum - accum = accumulate_2x2x2[float,float](accum, sx, sy, sz, sw) + accum = accumulate_2x2x2[float,float](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) @@ -732,21 +742,22 @@ def _average_pooling_2x2x2_float(np.ndarray[float, ndim=4] channel, uint32_t num return results -def _average_pooling_2x2x2_double(np.ndarray[double, ndim=4] channel, uint32_t num_mips): +def _average_pooling_2x2x2_double(np.ndarray[double, ndim=5] channel, uint32_t num_mips): cdef size_t sx = channel.shape[0] cdef size_t sy = channel.shape[1] cdef size_t sz = channel.shape[2] cdef size_t sw = channel.shape[3] + cdef size_t sv = channel.shape[4] cdef size_t sxy = sx * sy cdef size_t osx = (sx + 1) // 2 cdef size_t osy = (sy + 1) // 2 cdef size_t osz = (sz + 1) // 2 cdef size_t osxy = osx * osy - cdef size_t ovoxels = osxy * osz * sw + cdef size_t ovoxels = osxy * osz * sw * sv - cdef double[:,:,:,:] channelview = channel - cdef double* accum = accumulate_2x2x2[double,double](&channelview[0,0,0,0], sx, sy, sz, sw) + cdef double[:,:,:,:,:] channelview = channel + cdef double* accum = accumulate_2x2x2[double,double](&channelview[0,0,0,0,0], sx, sy, sz, sw, sv) cdef double[:] accumview = accum cdef double* tmp cdef uint32_t mip @@ -762,7 +773,7 @@ def _average_pooling_2x2x2_double(np.ndarray[double, ndim=4] channel, uint32_t n render_image_floating[double](&accumview[0], &oimgview[0], divisor, ovoxels) results.append( - oimg.reshape( (osx, osy, osz, sw), order='F' ) + oimg.reshape( (osx, osy, osz, sw, sv), order='F' ) ) if mip == num_mips - 1: @@ -776,10 +787,10 @@ def _average_pooling_2x2x2_double(np.ndarray[double, ndim=4] channel, uint32_t n osy = (sy + 1) // 2 osz = (sz + 1) // 2 osxy = osx * osy - ovoxels = osxy * osz * sw + ovoxels = osxy * osz * sw * sv tmp = accum - accum = accumulate_2x2x2[double,double](accum, sx, sy, sz, sw) + accum = accumulate_2x2x2[double,double](accum, sx, sy, sz, sw, sv) accumview = accum PyMem_Free(tmp) @@ -791,7 +802,7 @@ def _average_pooling_2x2x2_double(np.ndarray[double, ndim=4] channel, uint32_t n def mode_pooling_2x2(img, uint32_t num_mips=1): ndim = img.ndim - img = expand_dims(img, 4) + img = expand_dims(img, 5) results = [] for mip in range(num_mips): @@ -803,31 +814,33 @@ def mode_pooling_2x2(img, uint32_t num_mips=1): return results -def _mode_pooling_2x2_cpp(np.ndarray[NUMBER, ndim=4] img): +def _mode_pooling_2x2_cpp(np.ndarray[NUMBER, ndim=5] img): sx = img.shape[0] sy = img.shape[1] sz = img.shape[2] sw = img.shape[3] + sv = img.shape[4] - size = ( (sx+1) // 2, (sy+1) // 2, sz, sw ) + size = ( (sx+1) // 2, (sy+1) // 2, sz, sw, sv ) - cdef np.ndarray[NUMBER, ndim=4] oimg = np.zeros(size, dtype=img.dtype, order='F') + cdef np.ndarray[NUMBER, ndim=5] oimg = np.zeros(size, dtype=img.dtype, order='F') - cdef uint8_t[:,:,:,:] arr_memview8u_i - cdef uint16_t[:,:,:,:] arr_memview16u_i - cdef uint32_t[:,:,:,:] arr_memview32u_i - cdef uint64_t[:,:,:,:] arr_memview64u_i + cdef uint8_t[:,:,:,:,:] arr_memview8u_i + cdef uint16_t[:,:,:,:,:] arr_memview16u_i + cdef uint32_t[:,:,:,:,:] arr_memview32u_i + cdef uint64_t[:,:,:,:,:] arr_memview64u_i - cdef uint8_t[:,:,:,:] arr_memview8u_o - cdef uint16_t[:,:,:,:] arr_memview16u_o - cdef uint32_t[:,:,:,:] arr_memview32u_o - cdef uint64_t[:,:,:,:] arr_memview64u_o + cdef uint8_t[:,:,:,:,:] arr_memview8u_o + cdef uint16_t[:,:,:,:,:] arr_memview16u_o + cdef uint32_t[:,:,:,:,:] arr_memview32u_o + cdef uint64_t[:,:,:,:,:] arr_memview64u_o strides = np.array([ img.strides[0], img.strides[1], img.strides[2], img.strides[3], + img.strides[4], ]) strides //= sizeof(NUMBER) @@ -835,33 +848,33 @@ def _mode_pooling_2x2_cpp(np.ndarray[NUMBER, ndim=4] img): arr_memview8u_i = img.view(np.uint8) arr_memview8u_o = oimg.view(np.uint8) _mode_pooling_2x2[uint8_t]( - &arr_memview8u_i[0,0,0,0], &arr_memview8u_o[0,0,0,0], - sx, sy, sz, sw, - strides[0], strides[1], strides[2], strides[3] + &arr_memview8u_i[0,0,0,0,0], &arr_memview8u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, + strides[0], strides[1], strides[2], strides[3], strides[4] ) elif img.dtype in (np.uint16, np.int16): arr_memview16u_i = img.view(np.uint16) arr_memview16u_o = oimg.view(np.uint16) _mode_pooling_2x2[uint16_t]( - &arr_memview16u_i[0,0,0,0], &arr_memview16u_o[0,0,0,0], - sx, sy, sz, sw, - strides[0], strides[1], strides[2], strides[3] + &arr_memview16u_i[0,0,0,0,0], &arr_memview16u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, + strides[0], strides[1], strides[2], strides[3], strides[4] ) elif img.dtype in (np.uint32, np.int32, np.float32): arr_memview32u_i = img.view(np.uint32) arr_memview32u_o = oimg.view(np.uint32) _mode_pooling_2x2[uint32_t]( - &arr_memview32u_i[0,0,0,0], &arr_memview32u_o[0,0,0,0], - sx, sy, sz, sw, - strides[0], strides[1], strides[2], strides[3] + &arr_memview32u_i[0,0,0,0,0], &arr_memview32u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, + strides[0], strides[1], strides[2], strides[3], strides[4] ) elif img.dtype in (np.uint64, np.int64, np.float64, np.csingle): arr_memview64u_i = img.view(np.uint64) arr_memview64u_o = oimg.view(np.uint64) _mode_pooling_2x2[uint64_t]( - &arr_memview64u_i[0,0,0,0], &arr_memview64u_o[0,0,0,0], - sx, sy, sz, sw, - strides[0], strides[1], strides[2], strides[3] + &arr_memview64u_i[0,0,0,0,0], &arr_memview64u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, + strides[0], strides[1], strides[2], strides[3], strides[4] ) else: raise ValueError("{} not supported.".format(img.dtype)) @@ -870,7 +883,7 @@ def _mode_pooling_2x2_cpp(np.ndarray[NUMBER, ndim=4] img): def mode_pooling_2x2x2(img, uint32_t num_mips=1, sparse=False): ndim = img.ndim - img = expand_dims(img, 4) + img = expand_dims(img, 5) results = [] for mip in range(num_mips): @@ -882,56 +895,57 @@ def mode_pooling_2x2x2(img, uint32_t num_mips=1, sparse=False): return results -def _mode_pooling_2x2x2_helper(np.ndarray[NUMBER, ndim=4] img, sparse): +def _mode_pooling_2x2x2_helper(np.ndarray[NUMBER, ndim=5] img, sparse): sx = img.shape[0] sy = img.shape[1] sz = img.shape[2] sw = img.shape[3] + sv = img.shape[3] - size = ( (sx+1) // 2, (sy+1) // 2, (sz+1) // 2, sw ) + size = ( (sx+1) // 2, (sy+1) // 2, (sz+1) // 2, sw, sv ) - cdef np.ndarray[NUMBER, ndim=4] oimg = np.zeros(size, dtype=img.dtype, order='F') + cdef np.ndarray[NUMBER, ndim=5] oimg = np.zeros(size, dtype=img.dtype, order='F') - cdef uint8_t[:,:,:,:] arr_memview8u_i - cdef uint16_t[:,:,:,:] arr_memview16u_i - cdef uint32_t[:,:,:,:] arr_memview32u_i - cdef uint64_t[:,:,:,:] arr_memview64u_i + cdef uint8_t[:,:,:,:,:] arr_memview8u_i + cdef uint16_t[:,:,:,:,:] arr_memview16u_i + cdef uint32_t[:,:,:,:,:] arr_memview32u_i + cdef uint64_t[:,:,:,:,:] arr_memview64u_i - cdef uint8_t[:,:,:,:] arr_memview8u_o - cdef uint16_t[:,:,:,:] arr_memview16u_o - cdef uint32_t[:,:,:,:] arr_memview32u_o - cdef uint64_t[:,:,:,:] arr_memview64u_o + cdef uint8_t[:,:,:,:,:] arr_memview8u_o + cdef uint16_t[:,:,:,:,:] arr_memview16u_o + cdef uint32_t[:,:,:,:,:] arr_memview32u_o + cdef uint64_t[:,:,:,:,:] arr_memview64u_o if img.dtype in (np.uint8, np.int8): arr_memview8u_i = img.view(np.uint8) arr_memview8u_o = oimg.view(np.uint8) _mode_pooling_2x2x2[uint8_t]( - &arr_memview8u_i[0,0,0,0], &arr_memview8u_o[0,0,0,0], - sx, sy, sz, sw, + &arr_memview8u_i[0,0,0,0,0], &arr_memview8u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, bool(sparse) ) elif img.dtype in (np.uint16, np.int16): arr_memview16u_i = img.view(np.uint16) arr_memview16u_o = oimg.view(np.uint16) _mode_pooling_2x2x2[uint16_t]( - &arr_memview16u_i[0,0,0,0], &arr_memview16u_o[0,0,0,0], - sx, sy, sz, sw, + &arr_memview16u_i[0,0,0,0,0], &arr_memview16u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, bool(sparse) ) elif img.dtype in (np.uint32, np.int32, np.float32): arr_memview32u_i = img.view(np.uint32) arr_memview32u_o = oimg.view(np.uint32) _mode_pooling_2x2x2[uint32_t]( - &arr_memview32u_i[0,0,0,0], &arr_memview32u_o[0,0,0,0], - sx, sy, sz, sw, + &arr_memview32u_i[0,0,0,0,0], &arr_memview32u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, bool(sparse) ) elif img.dtype in (np.uint64, np.int64, np.float64, np.csingle): arr_memview64u_i = img.view(np.uint64) arr_memview64u_o = oimg.view(np.uint64) _mode_pooling_2x2x2[uint64_t]( - &arr_memview64u_i[0,0,0,0], &arr_memview64u_o[0,0,0,0], - sx, sy, sz, sw, + &arr_memview64u_i[0,0,0,0,0], &arr_memview64u_o[0,0,0,0,0], + sx, sy, sz, sw, sv, bool(sparse) ) else: