Skip to content

Commit

Permalink
store q hash table in shmem
Browse files Browse the repository at this point in the history
comment out for now
  • Loading branch information
Ahdhn committed Aug 15, 2023
1 parent 05963a6 commit deb9d44
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 99 deletions.
16 changes: 4 additions & 12 deletions include/rxmesh/cavity_manager.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -496,18 +496,6 @@ struct CavityManager
* the lid lives in src_patch and we want to find the corresponding local
* index in dest_patch
*/
template <typename HandleT>
__device__ __inline__ uint16_t find_copy(
uint16_t& lid,
uint32_t& src_patch,
const uint16_t dest_patch_num_elements,
const Bitmask& dest_patch_owned_mask,
const Bitmask& dest_patch_active_mask,
const Bitmask& dest_in_cavity,
const LPPair* s_table,
const LPPair* s_stash);


template <typename HandleT>
__device__ __inline__ uint16_t find_copy(
uint16_t& lid,
Expand Down Expand Up @@ -690,6 +678,10 @@ struct CavityManager

bool* m_s_should_slice;
ShmemMutex m_s_patch_stash_mutex;

//LPPair* m_s_table_q;
//LPPair* m_s_table_stash_q;
//uint32_t m_s_table_q_size;
};

} // namespace rxmesh
Expand Down
111 changes: 39 additions & 72 deletions include/rxmesh/cavity_manager_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,17 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
// cavity boundary edges
m_s_cavity_boundary_edges = shrd_alloc.alloc<uint16_t>(m_s_num_edges[0]);

// q hash table
// m_s_table_q_size = std::max(
// std::max(m_context.m_max_lp_capacity_v,
// m_context.m_max_lp_capacity_e), m_context.m_max_lp_capacity_f);
// m_s_table_q = shrd_alloc.alloc<LPPair>(m_s_table_q_size);

//__shared__ LPPair st_q[LPHashTable::stash_size];
//m_s_table_stash_q = st_q;
//fill_n<blockThreads>(
// m_s_table_stash_q, uint16_t(LPHashTable::stash_size), LPPair());

// lp stash
__shared__ LPPair st_v[LPHashTable::stash_size];
m_s_table_stash_v = st_v;
Expand Down Expand Up @@ -1681,6 +1692,12 @@ CavityManager<blockThreads, cop>::soft_migrate_from_patch(
m_correspondence_size_vf,
m_s_table_v,
m_s_table_stash_v);

// assert(m_s_table_q_size >=
// m_context.m_patches_info[q].lp_v.get_capacity());
// m_context.m_patches_info[q].lp_v.load_in_shared_memory(
// m_s_table_q, true, m_s_table_stash_q);

block.sync();

// make sure there is a copy in p for any vertex in
Expand Down Expand Up @@ -1842,6 +1859,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
m_correspondence_size_vf,
m_s_table_v,
m_s_table_stash_v);

// assert(m_s_table_q_size >=
// m_context.m_patches_info[q].lp_v.get_capacity());
// m_context.m_patches_info[q].lp_v.load_in_shared_memory(
// m_s_table_q, true, m_s_table_stash_q);

block.sync();

// 3. make sure there is a copy in p for any vertex in
Expand Down Expand Up @@ -1895,6 +1918,11 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
m_correspondence_size_e,
m_s_table_e,
m_s_table_stash_e);
// assert(m_s_table_q_size >=
// m_context.m_patches_info[q].lp_e.get_capacity());
// m_context.m_patches_info[q].lp_e.load_in_shared_memory(
// m_s_table_q, true, m_s_table_stash_q);

block.sync();

// same story as with the loop that adds vertices
Expand Down Expand Up @@ -2027,6 +2055,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
m_correspondence_size_vf,
m_s_table_f,
m_s_table_stash_f);

// assert(m_s_table_q_size >=
// m_context.m_patches_info[q].lp_f.get_capacity());
// m_context.m_patches_info[q].lp_f.load_in_shared_memory(
// m_s_table_q, true, m_s_table_stash_q);

block.sync();

// same story as with the loop that adds vertices
Expand Down Expand Up @@ -2403,77 +2437,6 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy_face(
}


template <uint32_t blockThreads, CavityOp cop>
template <typename HandleT>
__device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
uint16_t& lid,
uint32_t& src_patch,
const uint16_t dest_patch_num_elements,
const Bitmask& dest_patch_owned_mask,
const Bitmask& dest_patch_active_mask,
const Bitmask& dest_in_cavity,
const LPPair* s_table,
const LPPair* s_stash)
{

assert(
!m_context.m_patches_info[src_patch].is_deleted(HandleT::LocalT(lid)));

// First check if lid is owned by src_patch. If not, then map it to its
// owner patch and local index in it

if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
HandleT owner =
m_context.m_patches_info[src_patch].find<HandleT>({lid});
src_patch = owner.patch_id();
lid = owner.local_id();
} else {
if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
assert(lid < m_correspondence_size_e);
return m_s_q_correspondence_e[lid];
} else {
assert(lid < m_correspondence_size_vf);
return m_s_q_correspondence_vf[lid];
}
}

// if the owner src_patch is the same as the patch associated with this
// cavity, the lid is the local index we are looking for
if (src_patch == m_patch_info.patch_id) {
return lid;
}

// otherwise, we do a search over the not-owned elements in the dest
// patch. For every not-owned element, we map it to its owner patch and
// check against lid-src_patch pair
for (uint16_t i = 0; i < dest_patch_num_elements; ++i) {
assert(i < dest_patch_owned_mask.size());
assert(i < dest_patch_active_mask.size());
assert(i < dest_in_cavity.size());
if (!dest_patch_owned_mask(i) &&
(dest_patch_active_mask(i) || dest_in_cavity(i))) {

const HandleT handle = m_patch_info.find<HandleT>(
i, s_table, s_stash, m_s_patch_stash);

// These assertion does not work any more since we change the
// active and owned mask when we add new elements. So, a thread
// A might set the bit for the active mask and reset the owned
// for element X before adding it to the hashtable leading to
// another thread B looking for it without finding it

// assert(handle.is_valid());
// assert(handle.patch_id() != INVALID32);
// assert(handle.local_id() != INVALID16);

if (handle.patch_id() == src_patch && handle.local_id() == lid) {
return i;
}
}
}
return INVALID16;
}

template <uint32_t blockThreads, CavityOp cop>
template <typename HandleT>
__device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
Expand Down Expand Up @@ -2507,7 +2470,11 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
const uint16_t lid_in(lid);
HandleT owner;
if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
owner = m_context.m_patches_info[src_patch].find<HandleT>({lid});
owner = m_context.m_patches_info[src_patch].find<HandleT>(
{lid} /*, m_s_table_q, m_s_table_stash_q */);

assert(owner.is_valid());

// if the owner src_patch is the same as the patch associated with this
// cavity, the lid is the local index we are looking for
src_patch = owner.patch_id();
Expand Down
21 changes: 12 additions & 9 deletions include/rxmesh/patch_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ struct ALIGN(16) PatchInfo
faces_capacity(nullptr),
patch_id(INVALID32){};

__device__ __host__ PatchInfo(const PatchInfo& other) = default;
__device__ __host__ PatchInfo(PatchInfo&&) = default;
__device__ __host__ PatchInfo& operator=(const PatchInfo&) = default;
__device__ __host__ PatchInfo& operator=(PatchInfo&&) = default;
__device__ __host__ ~PatchInfo() = default;
__device__ __host__ PatchInfo(const PatchInfo& other) = default;
__device__ __host__ PatchInfo(PatchInfo&&) = default;
__device__ __host__ PatchInfo& operator=(const PatchInfo&) = default;
__device__ __host__ PatchInfo& operator=(PatchInfo&&) = default;
__device__ __host__ ~PatchInfo() = default;

// The topology information: edge incident vertices and face incident edges
LocalVertexT* ev;
Expand Down Expand Up @@ -145,17 +145,20 @@ struct ALIGN(16) PatchInfo
}

template <typename HandleT>
__device__ __host__ __inline__ HandleT find(const LPPair::KeyT key) const
__device__ __host__ __inline__ HandleT find(
const LPPair::KeyT key,
const LPPair* table = nullptr,
const LPPair* stash = nullptr) const
{
LPPair lp;
if constexpr (std::is_same_v<HandleT, VertexHandle>) {
lp = lp_v.find(key, nullptr, nullptr);
lp = lp_v.find(key, table, stash);
}
if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
lp = lp_e.find(key, nullptr, nullptr);
lp = lp_e.find(key, table, stash);
}
if constexpr (std::is_same_v<HandleT, FaceHandle>) {
lp = lp_f.find(key, nullptr, nullptr);
lp = lp_f.find(key, table, stash);
}

// assert(!lp.is_sentinel());
Expand Down
2 changes: 1 addition & 1 deletion include/rxmesh/patch_scheduler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// for debugging, this macro let the scheduler only generate one valid patch
// (corresponding to the blockIdx.x)
//#define PROCESS_SINGLE_PATCH
// #define PROCESS_SINGLE_PATCH

// inpsired/taken from
// https://github.com/GPUPeople/Ouroboros/blob/9153c55abffb3bceb5aea4028dfcc00439b046d5/include/device/queues/Queue.h
Expand Down
15 changes: 14 additions & 1 deletion include/rxmesh/rxmesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,8 @@ void RXMesh::build_device()
m_d_patches_info[p]);
}

for (int p = 0; p < static_cast<int>(get_num_patches()); ++p) {

for (uint32_t p = 0; p < get_num_patches(); ++p) {
m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
m_h_patches_info[p].lp_v.get_capacity());

Expand Down Expand Up @@ -1175,5 +1176,17 @@ void RXMesh::allocate_extra_patches()
m_h_patches_info[p],
m_d_patches_info[p]);
}


for (uint32_t p = get_num_patches(); p < get_max_num_patches(); ++p) {
m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
m_h_patches_info[p].lp_v.get_capacity());

m_max_capacity_lp_e = std::max(m_max_capacity_lp_e,
m_h_patches_info[p].lp_e.get_capacity());

m_max_capacity_lp_f = std::max(m_max_capacity_lp_f,
m_h_patches_info[p].lp_f.get_capacity());
}
}
} // namespace rxmesh
6 changes: 6 additions & 0 deletions include/rxmesh/rxmesh_dynamic.h
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,12 @@ class RXMeshDynamic : public RXMeshStatic
cavity_size_shmem += (this->m_max_faces_per_patch / 2) * sizeof(int) +
ShmemAllocator::default_alignment;

size_t q_lp_shmem = std::max(max_lp_hashtable_capacity<LocalVertexT>(),
max_lp_hashtable_capacity<LocalEdgeT>());

q_lp_shmem = std::max(q_lp_shmem,
size_t(max_lp_hashtable_capacity<LocalFaceT>())) *
sizeof(LPPair);

// active, owned, migrate(for vertices only), src bitmask (for vertices
// and edges only), src connect (for vertices and edges only), ownership
Expand Down
4 changes: 0 additions & 4 deletions include/rxmesh/util/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ inline void HandleError(cudaError_t err, const char* file, int line)
if (err != cudaSuccess) {
Log::get_logger()->error("Line {} File {}", line, file);
Log::get_logger()->error("CUDA ERROR: {}", cudaGetErrorString(err));
#ifdef _WIN32
system("pause");
#else
exit(EXIT_FAILURE);
#endif
}
}
#define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__))
Expand Down

0 comments on commit deb9d44

Please sign in to comment.