igfuw · pdziekan · Feb 28, 2022 · Mar 1, 2022 · Mar 2, 2022 · Mar 2, 2022
diff --git a/libmpdata++-config.cmake b/libmpdata++-config.cmake
@@ -32,7 +32,7 @@ set(libmpdataxx_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/../../include/")
 
 ############################################################################################
 # debug mode compiler flags
-set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++14 -DBZ_DEBUG -g -Wno-enum-compare") #TODO: -Og if compiler supports it?
+set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++17 -DBZ_DEBUG -g -Wno-enum-compare -Wfatal-errors") #TODO: -Og if compiler supports it?
 
 
 ############################################################################################
@@ -42,7 +42,7 @@ if(
   CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
   CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" 
 )
-  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++14 -DNDEBUG -Ofast -march=native -Wno-enum-compare")
+  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++17 -DNDEBUG -Ofast -march=native -Wno-enum-compare")
 
   # preventing Kahan summation from being optimised out
   if (
@@ -58,7 +58,7 @@ if(
   CMAKE_CXX_COMPILER_ID STREQUAL "Intel"
 )
   # flags taken from -fast but without -static
-  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++14 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2")
+  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++17 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2")
 endif()
 
 

diff --git a/libmpdata++/blitz.hpp b/libmpdata++/blitz.hpp
@@ -51,6 +51,7 @@
 namespace libmpdataxx
 {
   template <int n_dims> using idx_t = blitz::RectDomain<n_dims>;
+  template <int n_dims> using idxs_t = blitz::StridedDomain<n_dims>;
   using rng_t = blitz::Range;
 
   // non-int ix_t means either rng_t or idx_t

diff --git a/libmpdata++/concurr/boost_thread.hpp b/libmpdata++/concurr/boost_thread.hpp
@@ -51,10 +51,8 @@ namespace libmpdataxx
 
 
         // ctor
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
-          b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0]))
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]))        {};
 
         void barrier()
         {
@@ -81,7 +79,7 @@ namespace libmpdataxx
 
       // ctor
       boost_thread(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };

diff --git a/libmpdata++/concurr/cxx11_thread.hpp b/libmpdata++/concurr/cxx11_thread.hpp
@@ -92,10 +92,8 @@ namespace libmpdataxx
         }
 
         // ctor
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
-          b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0]))
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]))        {};
 
         void barrier()
         {
@@ -119,7 +117,7 @@ namespace libmpdataxx
 
       // ctor
       cxx11_thread(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };

diff --git a/libmpdata++/concurr/detail/concurr_common.hpp b/libmpdata++/concurr/detail/concurr_common.hpp
@@ -14,7 +14,7 @@
 #include <boost/ptr_container/ptr_vector.hpp>
 #include <libmpdata++/blitz.hpp>
 
-#include <libmpdata++/concurr/detail/sharedmem.hpp>
+#include <libmpdata++/concurr/detail/sharedmem_refined.hpp>
 #include <libmpdata++/concurr/detail/timer.hpp>
 #include <libmpdata++/concurr/any.hpp>
 
@@ -34,6 +34,8 @@
 #include <libmpdata++/bcond/remote_3d.hpp>
 #include <libmpdata++/bcond/gndsky_3d.hpp>
 
+#include <libmpdata++/solvers/detail/solver_type_traits.hpp>
+
 namespace libmpdataxx
 {
   namespace concurr
@@ -149,12 +151,7 @@ namespace libmpdataxx
 
         protected:
 
-        // (cannot be nested due to templates)
-        typedef sharedmem<
-          typename solver_t::real_t,
-          solver_t::n_dims,
-          solver_t::n_tlev
-        > mem_t;
+        using mem_t = typename solver_t::mem_t;
 
         // member fields
         boost::ptr_vector<solver_t> algos;
@@ -452,6 +449,15 @@ namespace libmpdataxx
           return mem->max(mem->advectee(e));
         }
       };
+
+      template< class mem_t, class solver_t>
+      mem_t* mem_factory(const typename solver_t::rt_params_t &p)
+      {
+        if constexpr (solvers::detail::slvr_with_frac_recn<typename solver_t::ct_params_t_>())
+          return new mem_t(p.grid_size, pow(2, p.n_fra_iter));
+        else
+          return new mem_t(p.grid_size);
+      }
     } // namespace detail
   } // namespace concurr
 } // namespace libmpdataxx
diff --git a/libmpdata++/concurr/detail/distmem.hpp b/libmpdata++/concurr/detail/distmem.hpp
@@ -54,6 +54,7 @@ namespace libmpdataxx
         public:
 
         std::array<int, n_dims> grid_size;
+        std::array<int, n_dims> grid_size_ref;
 
         int rank()
         {

diff --git a/libmpdata++/concurr/detail/sharedmem.hpp b/libmpdata++/concurr/detail/sharedmem.hpp
@@ -97,8 +97,6 @@ namespace libmpdataxx
               rng_t(0, grid_size[d]-1),
               d == 0 ? distmem.rank() : 0,          // decomposition along x, because that's MPI decomposition
               d == 0 ? distmem.size() : 1
-             // d == shmem_decomp_dim ? distmem.rank() : 0,
-             // d == shmem_decomp_dim ? distmem.size() : 1
             );
             origin[d] = this->grid_size[d].first();
           }
@@ -294,6 +292,7 @@ namespace libmpdataxx
         }
 
         virtual arr_t advectee(int e = 0) = 0;
+        virtual const arr_t advectee_global(int e = 0) = 0;
 
         void advectee_global_set(const arr_t arr, int e = 0)
         {
@@ -507,9 +506,11 @@ namespace libmpdataxx
       class sharedmem<real_t, 3, n_tlev> : public sharedmem_common<real_t, 3, n_tlev>
       {
         using parent_t = sharedmem_common<real_t, 3, n_tlev>;
-        using arr_t = typename parent_t::arr_t;
         using parent_t::parent_t; // inheriting ctors
 
+        protected:
+        using arr_t = typename parent_t::arr_t;
+
         public:
 
         virtual arr_t *never_delete(arr_t *arg) override

diff --git a/libmpdata++/concurr/detail/sharedmem_refined.hpp b/libmpdata++/concurr/detail/sharedmem_refined.hpp
@@ -0,0 +1,122 @@
+/** @file
+ * @copyright University of Warsaw
+ * @section LICENSE
+ * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+ */
+
+// memory management with (fractal) grid refinement
+
+#pragma once
+
+#include "sharedmem.hpp"
+
+namespace libmpdataxx
+{
+  namespace concurr
+  {
+    namespace detail
+    {
+      template <
+        typename real_t,
+        int n_dims,
+        int n_tlev
+      >
+      class sharedmem_refined_common : public sharedmem<real_t, n_dims, n_tlev>
+      {
+        using parent_t = sharedmem<real_t, n_dims, n_tlev>;
+
+        protected:
+
+        using arr_t = typename parent_t::arr_t;
+
+        blitz::TinyVector<int, n_dims> origin_ref;
+
+        public:
+
+        const int n_ref; // number of equal divisions of the large cell (in each direction), refined resolution is dx / n_ref;
+                         // every n_ref scalar of the refined grid is at the same position as a scalar of the normal grid
+                         // no refinement done in the halo, because there are no SD in the halo (it's not real space)
+                         // what about MPI boundaries? there are refined points exactly at the boundary (since n_ref has to be even)
+                         // note: if there is a refined cell that is divided by the MPI boundary, o we need to add contributions from microphysics to both processes on both sides?
+                         //       maybe not, because microphysics contrbutions will affect the large cells, which are not divided by the MPI boundary...
+
+        std::array<rng_t, n_dims> grid_size_ref;
+        // TODO: these are public because used from outside in alloc - could friendship help?
+        //arrvec_t<arr_t> GC_ref, psi_ref; 
+        arrvec_t<arr_t> psi_ref; 
+
+        // ctors
+        sharedmem_refined_common(const std::array<int, n_dims> &grid_size, const int &size, const int &n_ref)
+          : parent_t(grid_size, size), n_ref(n_ref)
+        {
+          assert(n_ref % 2 == 0); // only division into even number of cells, because we assume that one of the refined scalar points is at the MPI boundary, which is in the middle between normal grid scalars
+
+          // for now, require a grid_size that is convenient for fractal reconstruction (which calculates 2 points based on 3 points)
+          // NOTE: fix this with proper halos (cyclic is easy, but what about rigid?)
+          // NOTE2: this is actually a requirement for fractal reconstruction, not for any grid refinement, so move this somewhere else
+          for (int d = 0; d < n_dims; ++d)
+            if((grid_size[d] - 3) % 2 != 0) throw std::runtime_error("Fractal grid refinement requires nx/ny/nz = 3 + 2 * i, where i = 0,1,2,3,...");
+
+          for (int d = 0; d < n_dims; ++d)
+          {
+            grid_size_ref[d] = refine_grid_size(
+              this->grid_size[d],
+              n_ref,
+              d == 0 ? this->distmem.rank() : 0,
+              d == 0 ? this->distmem.size() : 1
+            );
+            origin_ref[d] = grid_size_ref[d].first();
+
+            this->distmem.grid_size_ref[d] = refine_grid_size(rng_t(0,grid_size[d]-1), n_ref, 0, 1).length();
+          }
+        }
+
+        // NOTE: not all advectees are refined, so e (numbering) in refinee is different than in advectee
+        virtual arr_t refinee(int e = 0) = 0;
+      //  virtual const arr_t refinee_global_ref(int e = 0) = 0;
+
+        public:
+        static rng_t refine_grid_size(
+          const rng_t &grid_size,
+          const int &n_ref,
+          const int &mpi_rank,
+          const int &mpi_size
+        ) {
+          assert(n_ref % 2 == 0);
+          // NOTE: overlapping points inbetween MPI domains
+          return rng_t(
+            mpi_rank == 0          ? grid_size.first() * n_ref : grid_size.first() * n_ref - n_ref / 2,
+            mpi_rank == mpi_size-1 ? grid_size.last()  * n_ref : grid_size.last()  * n_ref + n_ref / 2 // refined points between MPI domains are evenly divided between MPI processes
+          );
+        }
+      };
+
+
+
+      template<typename real_t, int n_dims, int n_tlev>
+      class sharedmem_refined
+      {};
+
+      template<typename real_t, int n_tlev>
+      class sharedmem_refined<real_t, 3, n_tlev> : public sharedmem_refined_common<real_t, 3, n_tlev>
+      {
+        using parent_t = sharedmem_refined_common<real_t, 3, n_tlev>;
+        using parent_t::parent_t; // inheriting ctors
+
+        protected:
+        using arr_t = typename parent_t::arr_t;
+
+        public:
+        arr_t refinee(int e = 0) override
+        {
+          return this->psi_ref[e](
+            this->grid_size_ref[0],
+            this->grid_size_ref[1],
+            this->grid_size_ref[2]
+          ).reindex(this->origin_ref);
+        }
+      };
+
+    } // namespace detail
+  } // namespace concurr
+} // namespace libmpdataxx
diff --git a/libmpdata++/concurr/openmp.hpp b/libmpdata++/concurr/openmp.hpp
@@ -31,7 +31,6 @@ namespace libmpdataxx
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
 
-
       struct mem_t : parent_t::mem_t
       {
         static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max())
@@ -58,7 +57,8 @@ namespace libmpdataxx
         }
 
         // ctors
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) : parent_t::mem_t(grid_size, size(grid_size[0])) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : parent_t::mem_t(grid_size, size(grid_size[0]))        {};
       };
 
       void solve(typename parent_t::advance_arg_t nt)
@@ -75,9 +75,8 @@ namespace libmpdataxx
 
       public:
 
-      // ctor
       openmp(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };

diff --git a/libmpdata++/concurr/serial.hpp b/libmpdata++/concurr/serial.hpp
@@ -33,9 +33,8 @@ namespace libmpdataxx
         void barrier() { }
 
         // ctors
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size)
-          : parent_t::mem_t(grid_size, size())
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : parent_t::mem_t(grid_size, size(grid_size[0]))        {};
       };
 
       void solve(typename parent_t::advance_arg_t nt)
@@ -47,7 +46,7 @@ namespace libmpdataxx
 
       // ctor
       serial(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size())
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size())
       {}
 
     };