store q hash table in shmem

comment out for now
owensgroup · Aug 15, 2023 · deb9d44 · deb9d44
1 parent 05963a6
commit deb9d44
Show file tree

Hide file tree

Showing 7 changed files with 76 additions and 99 deletions.
diff --git a/include/rxmesh/cavity_manager.cuh b/include/rxmesh/cavity_manager.cuh
@@ -496,18 +496,6 @@ struct CavityManager
      * the lid lives in src_patch and we want to find the corresponding local
      * index in dest_patch
      */
-    template <typename HandleT>
-    __device__ __inline__ uint16_t find_copy(
-        uint16_t&      lid,
-        uint32_t&      src_patch,
-        const uint16_t dest_patch_num_elements,
-        const Bitmask& dest_patch_owned_mask,
-        const Bitmask& dest_patch_active_mask,
-        const Bitmask& dest_in_cavity,
-        const LPPair*  s_table,
-        const LPPair*  s_stash);
-
-
     template <typename HandleT>
     __device__ __inline__ uint16_t find_copy(
         uint16_t&      lid,
@@ -690,6 +678,10 @@ struct CavityManager
 
     bool*      m_s_should_slice;
     ShmemMutex m_s_patch_stash_mutex;
+
+    //LPPair*  m_s_table_q;
+    //LPPair*  m_s_table_stash_q;
+    //uint32_t m_s_table_q_size;    
 };
 
 }  // namespace rxmesh

diff --git a/include/rxmesh/cavity_manager_impl.cuh b/include/rxmesh/cavity_manager_impl.cuh
@@ -228,6 +228,17 @@ CavityManager<blockThreads, cop>::alloc_shared_memory(
     // cavity boundary edges
     m_s_cavity_boundary_edges = shrd_alloc.alloc<uint16_t>(m_s_num_edges[0]);
 
+    // q hash table
+    // m_s_table_q_size = std::max(
+    //    std::max(m_context.m_max_lp_capacity_v,
+    //    m_context.m_max_lp_capacity_e), m_context.m_max_lp_capacity_f);
+    // m_s_table_q = shrd_alloc.alloc<LPPair>(m_s_table_q_size);
+
+    //__shared__ LPPair st_q[LPHashTable::stash_size];
+    //m_s_table_stash_q = st_q;
+    //fill_n<blockThreads>(
+    //    m_s_table_stash_q, uint16_t(LPHashTable::stash_size), LPPair());
+
     // lp stash
     __shared__ LPPair st_v[LPHashTable::stash_size];
     m_s_table_stash_v = st_v;
@@ -1681,6 +1692,12 @@ CavityManager<blockThreads, cop>::soft_migrate_from_patch(
                                               m_correspondence_size_vf,
                                               m_s_table_v,
                                               m_s_table_stash_v);
+
+        // assert(m_s_table_q_size >=
+        //        m_context.m_patches_info[q].lp_v.get_capacity());
+        // m_context.m_patches_info[q].lp_v.load_in_shared_memory(
+        //     m_s_table_q, true, m_s_table_stash_q);
+
         block.sync();
 
         // make sure there is a copy in p for any vertex in
@@ -1842,6 +1859,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
                                               m_correspondence_size_vf,
                                               m_s_table_v,
                                               m_s_table_stash_v);
+
+        // assert(m_s_table_q_size >=
+        //        m_context.m_patches_info[q].lp_v.get_capacity());
+        // m_context.m_patches_info[q].lp_v.load_in_shared_memory(
+        //     m_s_table_q, true, m_s_table_stash_q);
+
         block.sync();
 
         // 3. make sure there is a copy in p for any vertex in
@@ -1895,6 +1918,11 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
                                             m_correspondence_size_e,
                                             m_s_table_e,
                                             m_s_table_stash_e);
+        // assert(m_s_table_q_size >=
+        //        m_context.m_patches_info[q].lp_e.get_capacity());
+        // m_context.m_patches_info[q].lp_e.load_in_shared_memory(
+        //     m_s_table_q, true, m_s_table_stash_q);
+
         block.sync();
 
         // same story as with the loop that adds vertices
@@ -2027,6 +2055,12 @@ __device__ __inline__ bool CavityManager<blockThreads, cop>::migrate_from_patch(
                                             m_correspondence_size_vf,
                                             m_s_table_f,
                                             m_s_table_stash_f);
+
+        // assert(m_s_table_q_size >=
+        //        m_context.m_patches_info[q].lp_f.get_capacity());
+        // m_context.m_patches_info[q].lp_f.load_in_shared_memory(
+        //     m_s_table_q, true, m_s_table_stash_q);
+
         block.sync();
 
         // same story as with the loop that adds vertices
@@ -2403,77 +2437,6 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy_face(
 }
 
 
-template <uint32_t blockThreads, CavityOp cop>
-template <typename HandleT>
-__device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
-    uint16_t&      lid,
-    uint32_t&      src_patch,
-    const uint16_t dest_patch_num_elements,
-    const Bitmask& dest_patch_owned_mask,
-    const Bitmask& dest_patch_active_mask,
-    const Bitmask& dest_in_cavity,
-    const LPPair*  s_table,
-    const LPPair*  s_stash)
-{
-
-    assert(
-        !m_context.m_patches_info[src_patch].is_deleted(HandleT::LocalT(lid)));
-
-    // First check if lid is owned by src_patch. If not, then map it to its
-    // owner patch and local index in it
-
-    if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
-        HandleT owner =
-            m_context.m_patches_info[src_patch].find<HandleT>({lid});
-        src_patch = owner.patch_id();
-        lid       = owner.local_id();
-    } else {
-        if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
-            assert(lid < m_correspondence_size_e);
-            return m_s_q_correspondence_e[lid];
-        } else {
-            assert(lid < m_correspondence_size_vf);
-            return m_s_q_correspondence_vf[lid];
-        }
-    }
-
-    // if the owner src_patch is the same as the patch associated with this
-    // cavity, the lid is the local index we are looking for
-    if (src_patch == m_patch_info.patch_id) {
-        return lid;
-    }
-
-    // otherwise, we do a search over the not-owned elements in the dest
-    // patch. For every not-owned element, we map it to its owner patch and
-    // check against lid-src_patch pair
-    for (uint16_t i = 0; i < dest_patch_num_elements; ++i) {
-        assert(i < dest_patch_owned_mask.size());
-        assert(i < dest_patch_active_mask.size());
-        assert(i < dest_in_cavity.size());
-        if (!dest_patch_owned_mask(i) &&
-            (dest_patch_active_mask(i) || dest_in_cavity(i))) {
-
-            const HandleT handle = m_patch_info.find<HandleT>(
-                i, s_table, s_stash, m_s_patch_stash);
-
-            // These assertion does not work any more since we change the
-            // active and owned mask when we add new elements. So, a thread
-            // A might set the bit for the active mask and reset the owned
-            // for element X before adding it to the hashtable leading to
-            // another thread B looking for it without finding it
-
-            // assert(handle.is_valid());
-            // assert(handle.patch_id() != INVALID32);
-            // assert(handle.local_id() != INVALID16);
-
-            if (handle.patch_id() == src_patch && handle.local_id() == lid) {
-                return i;
-            }
-        }
-    }
-    return INVALID16;
-}
-
 template <uint32_t blockThreads, CavityOp cop>
 template <typename HandleT>
 __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
@@ -2507,7 +2470,11 @@ __device__ __inline__ uint16_t CavityManager<blockThreads, cop>::find_copy(
     const uint16_t lid_in(lid);
     HandleT        owner;
     if (!m_context.m_patches_info[src_patch].is_owned(HandleT::LocalT(lid))) {
-        owner = m_context.m_patches_info[src_patch].find<HandleT>({lid});
+        owner = m_context.m_patches_info[src_patch].find<HandleT>(
+            {lid} /*, m_s_table_q, m_s_table_stash_q */);
+
+        assert(owner.is_valid());
+
         // if the owner src_patch is the same as the patch associated with this
         // cavity, the lid is the local index we are looking for
         src_patch = owner.patch_id();

diff --git a/include/rxmesh/patch_info.h b/include/rxmesh/patch_info.h
@@ -48,11 +48,11 @@ struct ALIGN(16) PatchInfo
           faces_capacity(nullptr),
           patch_id(INVALID32){};
 
-    __device__ __host__ PatchInfo(const PatchInfo& other) = default;
-    __device__ __host__ PatchInfo(PatchInfo&&)            = default;
-    __device__ __host__ PatchInfo& operator=(const PatchInfo&) = default;
-    __device__ __host__ PatchInfo& operator=(PatchInfo&&) = default;
-    __device__                     __host__ ~PatchInfo()  = default;
+    __device__ __host__            PatchInfo(const PatchInfo& other) = default;
+    __device__ __host__            PatchInfo(PatchInfo&&)            = default;
+    __device__ __host__ PatchInfo& operator=(const PatchInfo&)       = default;
+    __device__ __host__ PatchInfo& operator=(PatchInfo&&)            = default;
+    __device__                     __host__ ~PatchInfo()             = default;
 
     // The topology information: edge incident vertices and face incident edges
     LocalVertexT* ev;
@@ -145,17 +145,20 @@ struct ALIGN(16) PatchInfo
     }
 
     template <typename HandleT>
-    __device__ __host__ __inline__ HandleT find(const LPPair::KeyT key) const
+    __device__ __host__ __inline__ HandleT find(
+        const LPPair::KeyT key,
+        const LPPair*      table = nullptr,
+        const LPPair*      stash = nullptr) const
     {
         LPPair lp;
         if constexpr (std::is_same_v<HandleT, VertexHandle>) {
-            lp = lp_v.find(key, nullptr, nullptr);
+            lp = lp_v.find(key, table, stash);
         }
         if constexpr (std::is_same_v<HandleT, EdgeHandle>) {
-            lp = lp_e.find(key, nullptr, nullptr);
+            lp = lp_e.find(key, table, stash);
         }
         if constexpr (std::is_same_v<HandleT, FaceHandle>) {
-            lp = lp_f.find(key, nullptr, nullptr);
+            lp = lp_f.find(key, table, stash);
         }
 
         // assert(!lp.is_sentinel());

diff --git a/include/rxmesh/patch_scheduler.cuh b/include/rxmesh/patch_scheduler.cuh
@@ -2,7 +2,7 @@
 
 // for debugging, this macro let the scheduler only generate one valid patch
 // (corresponding to the blockIdx.x)
-//#define PROCESS_SINGLE_PATCH
+// #define PROCESS_SINGLE_PATCH
 
 // inpsired/taken from
 // https://github.com/GPUPeople/Ouroboros/blob/9153c55abffb3bceb5aea4028dfcc00439b046d5/include/device/queues/Queue.h

diff --git a/include/rxmesh/rxmesh.cpp b/include/rxmesh/rxmesh.cpp
@@ -791,7 +791,8 @@ void RXMesh::build_device()
                                   m_d_patches_info[p]);
     }
 
-     for (int p = 0; p < static_cast<int>(get_num_patches()); ++p) {
+
+    for (uint32_t p = 0; p < get_num_patches(); ++p) {
         m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
                                        m_h_patches_info[p].lp_v.get_capacity());
 
@@ -1175,5 +1176,17 @@ void RXMesh::allocate_extra_patches()
                                   m_h_patches_info[p],
                                   m_d_patches_info[p]);
     }
+
+
+    for (uint32_t p = get_num_patches(); p < get_max_num_patches(); ++p) {
+        m_max_capacity_lp_v = std::max(m_max_capacity_lp_v,
+                                       m_h_patches_info[p].lp_v.get_capacity());
+
+        m_max_capacity_lp_e = std::max(m_max_capacity_lp_e,
+                                       m_h_patches_info[p].lp_e.get_capacity());
+
+        m_max_capacity_lp_f = std::max(m_max_capacity_lp_f,
+                                       m_h_patches_info[p].lp_f.get_capacity());
+    }
 }
 }  // namespace rxmesh
diff --git a/include/rxmesh/rxmesh_dynamic.h b/include/rxmesh/rxmesh_dynamic.h
@@ -453,6 +453,12 @@ class RXMeshDynamic : public RXMeshStatic
         cavity_size_shmem += (this->m_max_faces_per_patch / 2) * sizeof(int) +
                              ShmemAllocator::default_alignment;
 
+        size_t q_lp_shmem = std::max(max_lp_hashtable_capacity<LocalVertexT>(),
+                                     max_lp_hashtable_capacity<LocalEdgeT>());
+
+        q_lp_shmem = std::max(q_lp_shmem,
+                              size_t(max_lp_hashtable_capacity<LocalFaceT>())) *
+                     sizeof(LPPair);
 
         // active, owned, migrate(for vertices only), src bitmask (for vertices
         // and edges only), src connect (for vertices and edges only), ownership

diff --git a/include/rxmesh/util/macros.h b/include/rxmesh/util/macros.h
@@ -43,11 +43,7 @@ inline void HandleError(cudaError_t err, const char* file, int line)
     if (err != cudaSuccess) {
         Log::get_logger()->error("Line {} File {}", line, file);
         Log::get_logger()->error("CUDA ERROR: {}", cudaGetErrorString(err));
-#ifdef _WIN32
-        system("pause");
-#else
         exit(EXIT_FAILURE);
-#endif
     }
 }
 #define CUDA_ERROR(err) (HandleError(err, __FILE__, __LINE__))