From 42a59a6ea83c03225dcb45879b8b920d9b92a023 Mon Sep 17 00:00:00 2001 From: Robert Remen Date: Sun, 8 Sep 2024 17:12:30 +0200 Subject: [PATCH] feat: add distribute function (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What ❔ This PR adds the distribute function. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Documentation comments have been added / updated. --- src/bellman-cuda.cu | 5 +++++ src/bellman-cuda.h | 8 ++++++++ src/pn_kernels.cu | 17 +++++++++++++++++ src/pn_kernels.cuh | 2 ++ 4 files changed, 32 insertions(+) diff --git a/src/bellman-cuda.cu b/src/bellman-cuda.cu index c4916fd..65e823e 100644 --- a/src/bellman-cuda.cu +++ b/src/bellman-cuda.cu @@ -280,6 +280,11 @@ bc_error pn_set_values_from_packed_bits(void *values, const void *packet_bits, c static_cast(stream.handle))); } +bc_error pn_distribute_values(const void *src, void *dst, const unsigned count, const unsigned stride, bc_stream stream) { + return static_cast(pn::distribute_values(static_cast(src), static_cast(dst), count, stride, + static_cast(stream.handle))); +} + bc_error pn_tear_down() { return static_cast(pn::tear_down()); }; bc_error msm_set_up() { return static_cast(msm::set_up()); } diff --git a/src/bellman-cuda.h b/src/bellman-cuda.h index e618a37..4528287 100644 --- a/src/bellman-cuda.h +++ b/src/bellman-cuda.h @@ -458,6 +458,14 @@ bc_error pn_generate_permutation_polynomials(generate_permutation_polynomials_co // stream - Stream on which this operation will be scheduled bc_error pn_set_values_from_packed_bits(void *values, const void *packet_bits, unsigned count, bc_stream stream); +// Distribute field element values with a stride +// src - device pointer to the vector of field elements from where the values will be read +// dst - device pointer to the vector of field elements to where the results will be written +// count - number of values to distribute +// stride - stride with which the values will be distributed +// stream - Stream on which this operation will be scheduled +bc_error pn_distribute_values(const void *src, void *dst, unsigned count, unsigned stride, bc_stream stream); + // release all resources associated with the internal state for polynomial computations bc_error pn_tear_down(); diff --git a/src/pn_kernels.cu b/src/pn_kernels.cu index 9b9b205..77d2fb4 100644 --- a/src/pn_kernels.cu +++ b/src/pn_kernels.cu @@ -141,4 +141,21 @@ cudaError_t set_values_from_packed_bits(fd_q::storage *values, const unsigned *p return cudaGetLastError(); } +__global__ void distribute_values_kernel(const fd_q::storage *src, fd_q::storage *dst, const unsigned count, const unsigned stride) { + typedef fd_q::storage storage; + const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x; + if (gid >= count) + return; + const auto value = memory::load(src + gid); + memory::store(dst + gid * stride, value); +} + +cudaError_t distribute_values(const fd_q::storage *src, fd_q::storage *dst, const unsigned count, const unsigned stride, cudaStream_t stream) { + const unsigned threads_per_block = 128; + const dim3 block_dim = count < threads_per_block ? count : threads_per_block; + const dim3 grid_dim = (count - 1) / block_dim.x + 1; + distribute_values_kernel<<>>(src, dst, count, stride); + return cudaGetLastError(); +} + } // namespace pn \ No newline at end of file diff --git a/src/pn_kernels.cuh b/src/pn_kernels.cuh index 5b5cff0..91fe081 100644 --- a/src/pn_kernels.cuh +++ b/src/pn_kernels.cuh @@ -18,4 +18,6 @@ cudaError_t generate_permutation_matrix(fd_q::storage *values, const fd_q::stora cudaError_t set_values_from_packed_bits(fd_q::storage *values, const unsigned *packet_bits, unsigned count, cudaStream_t stream); +cudaError_t distribute_values(const fd_q::storage *src, fd_q::storage *dst, unsigned count, unsigned stride, cudaStream_t stream); + } // namespace pn \ No newline at end of file