From f3fd13059cc7b7f10a5829c655f5fd927a5cb7ff Mon Sep 17 00:00:00 2001 From: Amanda Bienz Date: Thu, 11 Jul 2024 15:11:30 -0600 Subject: [PATCH] Split into RAPtor and RAPtor-sparse --- .gitmodules | 6 + CMakeLists.txt | 52 +- config/raptor-sparseConfig.cmake.in | 8 + raptor-sparse | 1 + raptor/CMakeLists.txt | 22 +- raptor/aggregation/aggregate.hpp | 3 +- raptor/aggregation/candidates.hpp | 3 +- raptor/aggregation/mis.hpp | 3 +- raptor/aggregation/par_aggregate.hpp | 3 +- raptor/aggregation/par_candidates.hpp | 3 +- raptor/aggregation/par_mis.hpp | 3 +- raptor/aggregation/par_prolongation.hpp | 4 +- raptor/aggregation/prolongation.hpp | 4 +- raptor/core/CMakeLists.txt | 47 - raptor/core/README.md | 1 - raptor/core/comm_data.cpp | 88 - raptor/core/comm_data.hpp | 1428 ------------- raptor/core/comm_mat.cpp | 810 -------- raptor/core/comm_pkg.cpp | 203 -- raptor/core/comm_pkg.hpp | 1841 ----------------- raptor/core/matrix.cpp | 1438 ------------- raptor/core/matrix.hpp | 1353 ------------ raptor/core/mpi_types.cpp | 331 --- raptor/core/mpi_types.hpp | 133 -- raptor/core/par_matrix.cpp | 1116 ---------- raptor/core/par_matrix.hpp | 852 -------- raptor/core/par_vector.cpp | 123 -- raptor/core/par_vector.hpp | 179 -- raptor/core/partition.hpp | 349 ---- raptor/core/tap_comm.cpp | 1158 ----------- raptor/core/tests/CMakeLists.txt | 56 - raptor/core/tests/test_block_matrix.cpp | 212 -- raptor/core/tests/test_bsr_matrix.cpp | 79 - raptor/core/tests/test_matrix.cpp | 64 - .../core/tests/test_par_block_conversion.cpp | 105 - raptor/core/tests/test_par_block_matrix.cpp | 136 -- raptor/core/tests/test_par_bsr.cpp | 148 -- raptor/core/tests/test_par_comm.cpp | 83 - raptor/core/tests/test_par_matrix.cpp | 90 - raptor/core/tests/test_par_transpose.cpp | 50 - raptor/core/tests/test_par_vector.cpp | 70 - raptor/core/tests/test_tap_comm.cpp | 81 - raptor/core/tests/test_transpose.cpp | 45 - raptor/core/topology.hpp | 174 -- raptor/core/types.hpp | 77 - raptor/core/utilities.hpp | 211 -- raptor/core/vector.cpp | 167 -- raptor/core/vector.hpp | 224 -- raptor/gallery/CMakeLists.txt | 47 - raptor/gallery/diffusion.cpp | 83 - raptor/gallery/diffusion.hpp | 61 - raptor/gallery/laplacian27pt.cpp | 36 - raptor/gallery/laplacian27pt.hpp | 30 - raptor/gallery/matrix_IO.cpp | 112 - raptor/gallery/matrix_IO.hpp | 24 - raptor/gallery/matrix_market.cpp | 538 ----- raptor/gallery/matrix_market.hpp | 137 -- raptor/gallery/par_matrix_IO.cpp | 187 -- raptor/gallery/par_matrix_IO.hpp | 26 - raptor/gallery/par_matrix_market.cpp | 309 --- raptor/gallery/par_matrix_market.hpp | 23 - raptor/gallery/par_random.cpp | 30 - raptor/gallery/par_random.hpp | 19 - raptor/gallery/par_stencil.cpp | 228 -- raptor/gallery/par_stencil.hpp | 19 - raptor/gallery/random.cpp | 28 - raptor/gallery/random.hpp | 19 - raptor/gallery/stencil.cpp | 196 -- raptor/gallery/stencil.hpp | 20 - raptor/gallery/tests/CMakeLists.txt | 34 - raptor/gallery/tests/test_aniso.cpp | 58 - raptor/gallery/tests/test_laplacian.cpp | 54 - raptor/gallery/tests/test_matrix_market.cpp | 39 - raptor/gallery/tests/test_par_aniso.cpp | 99 - raptor/gallery/tests/test_par_laplacian.cpp | 98 - .../gallery/tests/test_par_matrix_market.cpp | 50 - raptor/gallery/tests/test_stencil.cpp | 94 - raptor/krylov/bicgstab.hpp | 4 +- raptor/krylov/cg.hpp | 4 +- raptor/krylov/par_bicgstab.hpp | 4 +- raptor/krylov/par_cg.hpp | 4 +- raptor/krylov/partial_inner.hpp | 3 +- raptor/multilevel/level.hpp | 4 +- raptor/multilevel/multilevel.hpp | 6 +- raptor/multilevel/par_level.hpp | 4 +- raptor/multilevel/par_multilevel.hpp | 6 +- .../CMakeLists 2.txt} | 0 raptor/precondition/CMakeLists.txt | 37 + .../par_diag_scale.cpp | 0 .../par_diag_scale.hpp | 3 +- .../linalg => precondition}/par_relax.cpp | 2 - .../linalg => precondition}/par_relax.hpp | 3 +- .../{util/linalg => precondition}/relax.cpp | 4 +- .../{util/linalg => precondition}/relax.hpp | 3 +- raptor/precondition/tests/CMakeLists.txt | 53 + raptor/{core => precondition}/tests/README.md | 0 .../tests/test_bsr_gs_aniso.cpp | 0 .../tests/test_bsr_gs_laplacian.cpp | 0 .../tests/test_bsr_jacobi_aniso.cpp | 0 .../tests/test_bsr_jacobi_laplacian.cpp | 0 .../tests/test_bsr_spmv_aniso.cpp | 0 .../tests/test_bsr_spmv_laplacian.cpp | 0 .../tests/test_bsr_spmv_random.cpp | 0 .../tests/test_gs_aniso.cpp | 0 .../tests/test_gs_laplacian.cpp | 0 .../tests/test_jacobi_aniso.cpp | 0 .../tests/test_jacobi_laplacian.cpp | 0 .../tests/test_par_add.cpp | 0 .../tests/test_par_scale_aniso.cpp | 0 .../tests/test_par_spmv_aniso.cpp | 0 .../tests/test_par_spmv_laplacian.cpp | 0 .../tests/test_par_spmv_random.cpp | 0 .../tests/test_parmetis.cpp | 0 .../tests/test_ptscotch.cpp | 0 .../tests/test_repartition.cpp | 0 .../tests/test_sor_aniso.cpp | 0 .../tests/test_sor_laplacian.cpp | 0 .../tests/test_spmv_aniso.cpp | 0 .../tests/test_spmv_laplacian.cpp | 0 .../tests/test_spmv_random.cpp | 0 .../tests/test_tap_spmv_aniso.cpp | 0 .../tests/test_tap_spmv_laplacian.cpp | 0 .../tests/test_tap_spmv_random.cpp | 0 raptor/profiling/profile_comm.cpp | 2 +- raptor/raptor.hpp | 60 +- raptor/ruge_stuben/cf_splitting.hpp | 3 +- raptor/ruge_stuben/interpolation.cpp | 1 - raptor/ruge_stuben/interpolation.hpp | 3 +- raptor/ruge_stuben/par_cf_splitting.hpp | 3 +- raptor/ruge_stuben/par_interpolation.cpp | 3 +- raptor/ruge_stuben/par_interpolation.hpp | 3 +- raptor/tests/compare.hpp | 3 +- raptor/tests/par_compare.hpp | 3 +- raptor/util/linalg/CMakeLists.txt | 46 - raptor/util/linalg/add.cpp | 114 - raptor/util/linalg/external/CMakeLists.txt | 34 - .../util/linalg/external/parmetis_wrapper.hpp | 112 - .../util/linalg/external/ptscotch_wrapper.hpp | 105 - raptor/util/linalg/matmult.cpp | 352 ---- raptor/util/linalg/par_add.cpp | 309 --- raptor/util/linalg/par_matmult.cpp | 563 ----- raptor/util/linalg/par_spmv.cpp | 342 --- raptor/util/linalg/repartition.cpp | 392 ---- raptor/util/linalg/repartition.hpp | 22 - raptor/util/linalg/spmv.cpp | 437 ---- raptor/util/tests/CMakeLists.txt | 155 -- raptor/util/tests/README.md | 1 - 147 files changed, 162 insertions(+), 19385 deletions(-) create mode 100644 .gitmodules create mode 100644 config/raptor-sparseConfig.cmake.in create mode 160000 raptor-sparse delete mode 100644 raptor/core/CMakeLists.txt delete mode 100644 raptor/core/README.md delete mode 100644 raptor/core/comm_data.cpp delete mode 100644 raptor/core/comm_data.hpp delete mode 100644 raptor/core/comm_mat.cpp delete mode 100644 raptor/core/comm_pkg.cpp delete mode 100644 raptor/core/comm_pkg.hpp delete mode 100644 raptor/core/matrix.cpp delete mode 100644 raptor/core/matrix.hpp delete mode 100644 raptor/core/mpi_types.cpp delete mode 100644 raptor/core/mpi_types.hpp delete mode 100644 raptor/core/par_matrix.cpp delete mode 100644 raptor/core/par_matrix.hpp delete mode 100644 raptor/core/par_vector.cpp delete mode 100644 raptor/core/par_vector.hpp delete mode 100644 raptor/core/partition.hpp delete mode 100644 raptor/core/tap_comm.cpp delete mode 100644 raptor/core/tests/CMakeLists.txt delete mode 100644 raptor/core/tests/test_block_matrix.cpp delete mode 100644 raptor/core/tests/test_bsr_matrix.cpp delete mode 100644 raptor/core/tests/test_matrix.cpp delete mode 100644 raptor/core/tests/test_par_block_conversion.cpp delete mode 100644 raptor/core/tests/test_par_block_matrix.cpp delete mode 100644 raptor/core/tests/test_par_bsr.cpp delete mode 100644 raptor/core/tests/test_par_comm.cpp delete mode 100644 raptor/core/tests/test_par_matrix.cpp delete mode 100644 raptor/core/tests/test_par_transpose.cpp delete mode 100644 raptor/core/tests/test_par_vector.cpp delete mode 100644 raptor/core/tests/test_tap_comm.cpp delete mode 100644 raptor/core/tests/test_transpose.cpp delete mode 100644 raptor/core/topology.hpp delete mode 100644 raptor/core/types.hpp delete mode 100644 raptor/core/utilities.hpp delete mode 100644 raptor/core/vector.cpp delete mode 100644 raptor/core/vector.hpp delete mode 100644 raptor/gallery/CMakeLists.txt delete mode 100644 raptor/gallery/diffusion.cpp delete mode 100644 raptor/gallery/diffusion.hpp delete mode 100644 raptor/gallery/laplacian27pt.cpp delete mode 100644 raptor/gallery/laplacian27pt.hpp delete mode 100644 raptor/gallery/matrix_IO.cpp delete mode 100644 raptor/gallery/matrix_IO.hpp delete mode 100644 raptor/gallery/matrix_market.cpp delete mode 100644 raptor/gallery/matrix_market.hpp delete mode 100644 raptor/gallery/par_matrix_IO.cpp delete mode 100644 raptor/gallery/par_matrix_IO.hpp delete mode 100644 raptor/gallery/par_matrix_market.cpp delete mode 100644 raptor/gallery/par_matrix_market.hpp delete mode 100644 raptor/gallery/par_random.cpp delete mode 100644 raptor/gallery/par_random.hpp delete mode 100644 raptor/gallery/par_stencil.cpp delete mode 100644 raptor/gallery/par_stencil.hpp delete mode 100644 raptor/gallery/random.cpp delete mode 100644 raptor/gallery/random.hpp delete mode 100644 raptor/gallery/stencil.cpp delete mode 100644 raptor/gallery/stencil.hpp delete mode 100644 raptor/gallery/tests/CMakeLists.txt delete mode 100644 raptor/gallery/tests/test_aniso.cpp delete mode 100644 raptor/gallery/tests/test_laplacian.cpp delete mode 100644 raptor/gallery/tests/test_matrix_market.cpp delete mode 100644 raptor/gallery/tests/test_par_aniso.cpp delete mode 100644 raptor/gallery/tests/test_par_laplacian.cpp delete mode 100644 raptor/gallery/tests/test_par_matrix_market.cpp delete mode 100644 raptor/gallery/tests/test_stencil.cpp rename raptor/{util/CMakeLists.txt => precondition/CMakeLists 2.txt} (100%) create mode 100644 raptor/precondition/CMakeLists.txt rename raptor/{util/linalg => precondition}/par_diag_scale.cpp (100%) rename raptor/{util/linalg => precondition}/par_diag_scale.hpp (86%) rename raptor/{util/linalg => precondition}/par_relax.cpp (99%) rename raptor/{util/linalg => precondition}/par_relax.hpp (90%) rename raptor/{util/linalg => precondition}/relax.cpp (98%) rename raptor/{util/linalg => precondition}/relax.hpp (94%) create mode 100644 raptor/precondition/tests/CMakeLists.txt rename raptor/{core => precondition}/tests/README.md (100%) rename raptor/{util => precondition}/tests/test_bsr_gs_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_gs_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_jacobi_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_jacobi_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_spmv_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_spmv_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_bsr_spmv_random.cpp (100%) rename raptor/{util => precondition}/tests/test_gs_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_gs_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_jacobi_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_jacobi_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_par_add.cpp (100%) rename raptor/{util => precondition}/tests/test_par_scale_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_par_spmv_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_par_spmv_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_par_spmv_random.cpp (100%) rename raptor/{util => precondition}/tests/test_parmetis.cpp (100%) rename raptor/{util => precondition}/tests/test_ptscotch.cpp (100%) rename raptor/{util => precondition}/tests/test_repartition.cpp (100%) rename raptor/{util => precondition}/tests/test_sor_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_sor_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_spmv_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_spmv_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_spmv_random.cpp (100%) rename raptor/{util => precondition}/tests/test_tap_spmv_aniso.cpp (100%) rename raptor/{util => precondition}/tests/test_tap_spmv_laplacian.cpp (100%) rename raptor/{util => precondition}/tests/test_tap_spmv_random.cpp (100%) delete mode 100644 raptor/util/linalg/CMakeLists.txt delete mode 100644 raptor/util/linalg/add.cpp delete mode 100644 raptor/util/linalg/external/CMakeLists.txt delete mode 100644 raptor/util/linalg/external/parmetis_wrapper.hpp delete mode 100644 raptor/util/linalg/external/ptscotch_wrapper.hpp delete mode 100644 raptor/util/linalg/matmult.cpp delete mode 100644 raptor/util/linalg/par_add.cpp delete mode 100644 raptor/util/linalg/par_matmult.cpp delete mode 100644 raptor/util/linalg/par_spmv.cpp delete mode 100644 raptor/util/linalg/repartition.cpp delete mode 100644 raptor/util/linalg/repartition.hpp delete mode 100644 raptor/util/linalg/spmv.cpp delete mode 100644 raptor/util/tests/CMakeLists.txt delete mode 100644 raptor/util/tests/README.md diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..eb8f8d57 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "external/googletest"] + path = external/googletest + url = https://github.com/google/googletest.git +[submodule "raptor-sparse"] + path = raptor-sparse + url = https://github.com/raptor-library/raptor-sparse.git diff --git a/CMakeLists.txt b/CMakeLists.txt index b292551a..027a1808 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,10 @@ enable_language(CXX) set(CMAKE_CXX_STANDARD 11) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wredundant-decls -Wcast-align -Wshadow") -#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto -funroll-loops") + + +add_compile_definitions(RAPTOR_SPARSE_TEST_FOLDER="${CMAKE_SOURCE_DIR}/raptor-sparse/src/tests/") +add_compile_definitions(RAPTOR_TEST_FOLDER="${CMAKE_SOURCE_DIR}/raptor/tests/") include(FeatureSummary) @@ -18,7 +21,6 @@ option(WITH_HYPRE "Add Hypre" OFF) option(WITH_MUELU "Add Trilinos MueLu" OFF) option(WITH_MFEM "Add MFEM" OFF) option(WITH_PETSC "Add Petsc" OFF) -option(WITH_AMPI "Using AMPI" OFF) option(WITH_MPI "Using MPI" ON) option(WITH_HOSTFILE "Use a Hostfile with MPI" OFF) @@ -26,7 +28,6 @@ add_feature_info(hypre WITH_HYPRE "Hypre preconditioner") add_feature_info(ml WITH_MUELU "Trilinos MueLu preconditioner") add_feature_info(mfem WITH_MFEM "MFEM matrix gallery") add_feature_info(petsc WITH_PETSC "Petsc Interface") -add_feature_info(ampi WITH_AMPI "Compile with AMPI") add_feature_info(crayxe CRAYXE "Compile on CrayXE") add_feature_info(bgq BGQ "Compile on BGQ") add_feature_info(ptscotch WITH_PTSCOTCH "Enable PTScotch Partitioning") @@ -45,8 +46,9 @@ if (WITH_MPI) SET(MPIRUN mpirun) endif (WITH_MPI) -#include_directories("external") set(raptor_INCDIR ${CMAKE_CURRENT_SOURCE_DIR}/raptor) +set(raptor_sparse_DIR raptor-sparse/src) +set(raptor_sparse_INCDIR ${CMAKE_CURRENT_SOURCE_DIR}/${raptor_sparse_DIR}) set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) SET(HYPRE_DIR "" CACHE STRING "Directory of HYPRE") @@ -58,37 +60,12 @@ SET(PETSC_DIR "" CACHE STRING "Directory of petsc") SET(PTSCOTCH_DIR "" CACHE STRING "Directory of Ptscotch") SET(PARMETIS_DIR "" CACHE STRING "Directory of ParMetis") SET(HOSTFILE "" CACHE STRING "Set Hostfile") +SET(BLAS_PATH "" CACHE STRING "/usr/linb/x86_64-linux-gnu") +SET(LAPACK_PATH "" CACHE STRING "/usr/linb/x86_64-linux-gnu") -if (CRAYXE) - set(EXTERNAL_LIBS "") - set(MPIRUN aprun) - set(CMAKE_AR "gcc-ar-4.7") - set(CMAKE_CXX_ARCHIVE_CREATE " qcs ") - set(CMAKE_CXX_ARCHIVE_FINISH true) -elseif (BGQ) - find_library(LAPACK_LIB NAMES liblapack lapack HINTS - "/soft/libraries/alcf/current/xl/LAPACK/lib") - find_library(BLAS_LIB NAMES libblas blas HINTS - "/soft/libraries/alcf/current/xl/BLAS/lib") - find_library(ESSL_LIB NAMES libesslbg esslbg HINTS - "/soft/libraries/essl/current/essl/5.1/lib64") - find_library(XLF_LIB NAMES libxlf90_r.a xlf90_r HINTS - "/soft/compilers/ibmcmp-may2016/xlf/bg/14.1/bglib64") - find_library(XLOPT_LIB NAMES libxlopt.a xlopt HINTS - "/soft/compilers/ibmcmp-may2016/xlf/bg/14.1/bglib64") - find_library(XLFMATH_LIB NAMES libxlfmath.a xlfmath HINTS - "/soft/compilers/ibmcmp-may2016/xlf/bg/14.1/bglib64") - find_library(XL_LIB NAMES libxl.a xl HINTS - "/soft/compilers/ibmcmp-may2016/xlf/bg/14.1/bglib64") - find_library(XLSMP_LIB NAMES libxlomp_ser.a xlomp_ser HINTS - "/soft/compilers/ibmcmp-may2016/xlsmp/bg/3.1/bglib64") - set(EXTERNAL_LIBS ${LAPACK_LIB} ${BLAS_LIB} ${ESSL_LIB} ${XLF_LIB} - ${XLOPT_LIB} ${XLFMATH_LIB} ${XL_LIB} ${XLSMP_LIB}) -else() - find_library(LAPACK_LIB NAMES liblapack.so.3 lapack HINTS "/usr/lib/x86_64-linux-gnu/") - find_library(BLAS_LIB NAMES libblas.so.3 blas HINTS "/usr/lib/x86_64-linux-gnu/") - set(EXTERNAL_LIBS ${LAPACK_LIB} ${BLAS_LIB}) -endif() +find_library(LAPACK_LIB NAMES liblapack.so.3 lapack HINTS ${LAPACK_PATH}) +find_library(BLAS_LIB NAMES libblas.so.3 blas HINTS ${BLAS_PATH}) +set(EXTERNAL_LIBS ${LAPACK_LIB} ${BLAS_LIB}) if (WITH_HOSTFILE) find_file (FILE_OF_HOST, ${HOSTFILE}) @@ -180,10 +157,9 @@ if(WITH_PETSC) endif(PETSC_FOUND) endif(WITH_PETSC) -if (WITH_AMPI) - add_definitions(-DUSE_AMPI) -endif(WITH_AMPI) - +include_directories(${CMAKE_SOURCE_DIR}/raptor-sparse) +include_directories(${CMAKE_SOURCE_DIR}/raptor-sparse/src) +add_subdirectory(raptor-sparse/src) add_subdirectory(raptor) if (BUILD_EXAMPLES) diff --git a/config/raptor-sparseConfig.cmake.in b/config/raptor-sparseConfig.cmake.in new file mode 100644 index 00000000..9cb5d2e7 --- /dev/null +++ b/config/raptor-sparseConfig.cmake.in @@ -0,0 +1,8 @@ +@PACKAGE_INIT@ + +if(NOT TARGET raptor-sparse AND NOT raptor-sparse_BINARY_DIR) + include("${CMAKE_CURRENT_LIST_DIR}/raptor-sparseTargets.cmake") + endif() + +find_package(Threads REQUIRED) +find_package(MPI COMPONENTS CXX REQUIRED) diff --git a/raptor-sparse b/raptor-sparse new file mode 160000 index 00000000..0463db14 --- /dev/null +++ b/raptor-sparse @@ -0,0 +1 @@ +Subproject commit 0463db14e9f99bac651217ba614e0f99a5bc2394 diff --git a/raptor/CMakeLists.txt b/raptor/CMakeLists.txt index 353ca2aa..bf01cfcd 100644 --- a/raptor/CMakeLists.txt +++ b/raptor/CMakeLists.txt @@ -1,6 +1,4 @@ -add_subdirectory(core) -add_subdirectory(gallery) -add_subdirectory(util) +add_subdirectory(precondition) add_subdirectory(ruge_stuben) add_subdirectory(aggregation) add_subdirectory(multilevel) @@ -17,9 +15,10 @@ else() ) endif() -add_library(raptor ${core_SOURCES} ${core_HEADERS} - ${gallery_SOURCES} ${gallery_HEADERS} ${ext_gallery_HEADERS} - ${util_SOURCES} ${util_HEADERS} +message(STATUS ${sparse_SOURCES}) + +add_library(raptor + ${precond_SOURCES} ${precond_HEADERS} ${par_SOURCES} strength.cpp ${ruge_stuben_SOURCES} ${ruge_stuben_HEADERS} ${aggregation_SOURCES} ${aggregation_HEADERS} @@ -28,7 +27,7 @@ add_library(raptor ${core_SOURCES} ${core_HEADERS} ${profile_SOURCES} ${profile_HEADERS} ${external_SOURCES} ${external_HEADERS}) -target_link_libraries(raptor PUBLIC ${MPI_C_LIBRARIES} ${MFEM_LIBRARIES} ${METIS_LIBRARIES} ${HYPRE_LIBRARIES} +target_link_libraries(raptor PUBLIC raptor-sparse ${MPI_C_LIBRARIES} ${MFEM_LIBRARIES} ${METIS_LIBRARIES} ${HYPRE_LIBRARIES} ${MUELU_LIBRARIES} ${PETSC_LIBRARIES} ${PTSCOTCH_LIBRARIES} ${PARMETIS_LIBRARIES} ${EXTERNAL_LIBS}) target_include_directories(raptor @@ -42,11 +41,8 @@ install(TARGETS raptor EXPORT raptorTargets DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(FILES raptor.hpp DESTINATION "include/raptor") -install(FILES ${core_HEADERS} DESTINATION "include/raptor/core") -install(FILES ${gallery_HEADERS} DESTINATION "include/raptor/gallery") install(FILES ${ext_gallery_HEADERS} DESTINATION "include/raptor/gallery/external") -install(FILES ${util_HEADERS} DESTINATION "include/raptor/util/linalg") -install(FILES ${ext_util_HEADERS} DESTINATION "include/raptor/util/linalg/external") +install(FILES ${precond_HEADERS} DESTINATION "include/raptor/precondition") install(FILES ${ruge_stuben_HEADERS} DESTINATION "include/raptor/ruge_stuben") install(FILES ${aggregation_HEADERS} DESTINATION "include/raptor/aggregation") install(FILES ${multilevel_HEADERS} DESTINATION "include/raptor/multilevel") @@ -72,12 +68,10 @@ install( if(ENABLE_UNIT_TESTS) add_subdirectory(tests) - add_subdirectory(core/tests) - add_subdirectory(util/tests) + add_subdirectory(precondition/tests) add_subdirectory(ruge_stuben/tests) add_subdirectory(aggregation/tests) add_subdirectory(multilevel/tests) - add_subdirectory(gallery/tests) add_subdirectory(krylov/tests) add_subdirectory(external/tests) endif() diff --git a/raptor/aggregation/aggregate.hpp b/raptor/aggregation/aggregate.hpp index 1c8754e1..dc700657 100644 --- a/raptor/aggregation/aggregate.hpp +++ b/raptor/aggregation/aggregate.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_AGGREGATE_HPP #define RAPTOR_AGGREGATION_AGGREGATE_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" #include "mis.hpp" namespace raptor { diff --git a/raptor/aggregation/candidates.hpp b/raptor/aggregation/candidates.hpp index d0e79ef1..6b4cbe53 100644 --- a/raptor/aggregation/candidates.hpp +++ b/raptor/aggregation/candidates.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_CANDIDATES_HPP #define RAPTOR_AGGREGATION_CANDIDATES_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { // TODO -- currently only accepts constant vector diff --git a/raptor/aggregation/mis.hpp b/raptor/aggregation/mis.hpp index a9417211..2fc14bc0 100644 --- a/raptor/aggregation/mis.hpp +++ b/raptor/aggregation/mis.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_MIS_HPP #define RAPTOR_AGGREGATION_MIS_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/aggregation/par_aggregate.hpp b/raptor/aggregation/par_aggregate.hpp index 9a5ada41..90057eb7 100644 --- a/raptor/aggregation/par_aggregate.hpp +++ b/raptor/aggregation/par_aggregate.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_PAR_AGGREGATE_HPP #define RAPTOR_AGGREGATION_PAR_AGGREGATE_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" #include "par_mis.hpp" namespace raptor { diff --git a/raptor/aggregation/par_candidates.hpp b/raptor/aggregation/par_candidates.hpp index d033acae..6404b60a 100644 --- a/raptor/aggregation/par_candidates.hpp +++ b/raptor/aggregation/par_candidates.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_PAR_CANDIDATES_HPP #define RAPTOR_AGGREGATION_PAR_CANDIDATES_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { // TODO -- currently only accepts constant vector diff --git a/raptor/aggregation/par_mis.hpp b/raptor/aggregation/par_mis.hpp index 33f854eb..0f27054b 100644 --- a/raptor/aggregation/par_mis.hpp +++ b/raptor/aggregation/par_mis.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_AGGREGATION_PAR_MIS_HPP #define RAPTOR_AGGREGATION_PAR_MIS_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/aggregation/par_prolongation.hpp b/raptor/aggregation/par_prolongation.hpp index c349faa5..56b7cd9b 100644 --- a/raptor/aggregation/par_prolongation.hpp +++ b/raptor/aggregation/par_prolongation.hpp @@ -3,9 +3,7 @@ #ifndef RAPTOR_AGGREGATION_PAR_PROLONGATION_HPP #define RAPTOR_AGGREGATION_PAR_PROLONGATION_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" namespace raptor { ParCSRMatrix* jacobi_prolongation(ParCSRMatrix* A, ParCSRMatrix* T, bool tap_comm = false, diff --git a/raptor/aggregation/prolongation.hpp b/raptor/aggregation/prolongation.hpp index 8394b565..386adac9 100644 --- a/raptor/aggregation/prolongation.hpp +++ b/raptor/aggregation/prolongation.hpp @@ -3,9 +3,7 @@ #ifndef RAPTOR_AGGREGATION_PROLONGATION_HPP #define RAPTOR_AGGREGATION_PROLONGATION_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" +#include "raptor-sparse.hpp" namespace raptor { CSRMatrix* jacobi_prolongation(CSRMatrix* A, CSRMatrix* T, double omega = 4.0/3, diff --git a/raptor/core/CMakeLists.txt b/raptor/core/CMakeLists.txt deleted file mode 100644 index 543c32cc..00000000 --- a/raptor/core/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Include the directory itself as a path to include directories -set(CMAKE_INCLUDE_CURRENT_DIR ON) - -# Create a variable called core_SOURCES containing all .cpp files: -if (WITH_MPI) - set(par_core_HEADERS - core/mpi_types.hpp - core/topology.hpp - core/partition.hpp - core/comm_data.hpp - core/comm_pkg.hpp - core/par_vector.hpp - core/par_matrix.hpp - ) - set(par_core_SOURCES - core/mpi_types.cpp - core/comm_data.cpp - core/tap_comm.cpp - core/comm_pkg.cpp - core/comm_mat.cpp - core/par_vector.cpp - core/par_matrix.cpp - ) -else () - set(par_core_HEADERS - "" - ) - - set (par_core_SOURCES - "" - ) -endif() - -set(core_SOURCES - core/vector.cpp - core/matrix.cpp - ${par_core_SOURCES} - PARENT_SCOPE - ) -set(core_HEADERS - core/types.hpp - core/vector.hpp - core/matrix.hpp - core/utilities.hpp - ${par_core_HEADERS} - PARENT_SCOPE - ) diff --git a/raptor/core/README.md b/raptor/core/README.md deleted file mode 100644 index c10090a9..00000000 --- a/raptor/core/README.md +++ /dev/null @@ -1 +0,0 @@ -vector, matrix, interface, and partition definitions diff --git a/raptor/core/comm_data.cpp b/raptor/core/comm_data.cpp deleted file mode 100644 index afb9c0df..00000000 --- a/raptor/core/comm_data.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "comm_data.hpp" - -namespace raptor -{ -template<> -std::vector& CommData::get_buffer(const int block_size) -{ - return buffer; -} -template<> -std::vector& CommData::get_buffer(const int block_size) -{ - return int_buffer; -} -template<> -std::vector& CommData::get_buffer(const int block_size) -{ - return pack_buffer; -} - -template<> -RAPtor_MPI_Datatype CommData::get_type() -{ - return RAPtor_MPI_INT; -} -template<> -RAPtor_MPI_Datatype CommData::get_type() -{ - return RAPtor_MPI_DOUBLE; -} - -template<> -void CommData::send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, int init_result_func_val) -{ - int_send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); -} -template<> -void CommData::send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, double init_result_func_val) -{ - double_send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); -} - -template<> -void CommData::send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) -{ - int_send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); -} -template<> -void CommData::send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) -{ - double_send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); -} - - -template <> -void CommData::recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size) -{ - int_recv(key, mpi_comm, off_proc_states, compare_func, - s_recv_ptr, n_recv_ptr, block_size); -} - -template <> -void CommData::recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size) -{ - double_recv(key, mpi_comm, off_proc_states, compare_func, - s_recv_ptr, n_recv_ptr, block_size); -} - - - -} diff --git a/raptor/core/comm_data.hpp b/raptor/core/comm_data.hpp deleted file mode 100644 index 474af490..00000000 --- a/raptor/core/comm_data.hpp +++ /dev/null @@ -1,1428 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_COMMDATA_HPP -#define RAPTOR_CORE_COMMDATA_HPP - -#define WITH_RAPtor_MPI 1 - -#include -#include "mpi_types.hpp" -#include "vector.hpp" -#include "matrix.hpp" -#include "utilities.hpp" - -/************************************************************** - ***** CommData Class - ************************************************************** - **************************************************************/ -namespace raptor -{ - // Forward Declaration -class CommData -{ -public: - /************************************************************** - ***** CommData Class Constructor - ************************************************************** - ***** Initializes an empty CommData, setting number and size of - ***** messages to zero. - - **************************************************************/ - CommData() - { - num_msgs = 0; - size_msgs = 0; - indptr.emplace_back(0); - } - - CommData(CommData* data) - { - num_msgs = data->num_msgs; - size_msgs = data->size_msgs; - std::copy(data->procs.begin(), data->procs.end(), - std::back_inserter(procs)); - std::copy(data->indptr.begin(), data->indptr.end(), - std::back_inserter(indptr)); - - if (num_msgs) - { - requests.resize(num_msgs); - } - - if (size_msgs) - { - buffer.resize(size_msgs); - int_buffer.resize(size_msgs); - } - } - - /************************************************************** - ***** ParComm Class Destructor - ************************************************************** - ***** - **************************************************************/ - virtual ~CommData() - { - }; - - virtual void add_msg(int proc, int msg_size, int* msg_indices = NULL) = 0; - - void finalize() - { - if (num_msgs) - { - requests.resize(num_msgs); - } - if (size_msgs) - { - buffer.resize(size_msgs); - int_buffer.resize(size_msgs); - } - } - - virtual void probe(int size, int key, RAPtor_MPI_Comm mpi_comm) = 0; - - virtual CommData* copy() = 0; - virtual CommData* copy(const std::vector& col_to_new) = 0; - - template - static RAPtor_MPI_Datatype get_type(); - - - template - std::vector& get_buffer(const int block_size = 1); - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - virtual void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - int init_result_func_val) = 0; - virtual void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - double init_result_func_val) = 0; - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size = 1); - virtual void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) = 0; - virtual void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) = 0; - - - virtual void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - const double* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) = 0; - virtual void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - double const* const* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) = 0; - virtual int get_msg_size(const int* rowptr, - const bool has_vals, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) = 0; - - - template - void recv(int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1) - { - if (num_msgs == 0) return; - - int proc, start, end; - int size = size_msgs * block_size; - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int) buf.size() < size) buf.resize(size); - - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - RAPtor_MPI_Irecv(&(buf[start*block_size]), (end - start) * block_size, datatype, - proc, key, mpi_comm, &(requests[i])); - } - } - - template - void recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1); - virtual void int_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) = 0; - virtual void double_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) = 0; - - void recv(CSRMatrix* recv_mat, int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1, - const bool vals = true) - { - if (num_msgs == 0) return; - - int proc, start, end, size; - int ctr, row_size, row_count; - int count, recv_size; - RAPtor_MPI_Status recv_status; - std::vector recv_buffer; - - recv_size = 0; - row_count = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - size = end - start; - - // Recv message of any size from proc - RAPtor_MPI_Probe(proc, key, mpi_comm, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_PACKED, &count); - - // Resize recv_buffer as needed - if (count > (int) recv_buffer.size()) - { - recv_buffer.resize(count); - } - RAPtor_MPI_Recv(&(recv_buffer[0]), count, RAPtor_MPI_PACKED, proc, key, - mpi_comm, &recv_status); - - // Go through recv, adding indices to matrix recv_mat - ctr = 0; - for (int j = 0; j < size; j++) - { - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &row_size, 1, RAPtor_MPI_INT, - mpi_comm); - recv_mat->idx1[row_count + 1] = recv_size + row_size; - row_count++; - recv_mat->idx2.resize(recv_size + row_size); - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &recv_mat->idx2[recv_size], - row_size, RAPtor_MPI_INT, mpi_comm); - - if (vals) - { - if (block_size > 1) - { - BSRMatrix* recv_mat_bsr = (BSRMatrix*) recv_mat; - recv_mat_bsr->block_vals.resize(recv_size + row_size); - for (int k = 0; k < row_size; k++) - { - recv_mat_bsr->block_vals[recv_size + k] = new double[block_size]; - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, - recv_mat_bsr->block_vals[recv_size + k], - block_size, RAPtor_MPI_DOUBLE, mpi_comm); - } - } - else - { - recv_mat->vals.resize(recv_size + row_size); - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &recv_mat->vals[recv_size], - row_size, RAPtor_MPI_DOUBLE, mpi_comm); - } - } - recv_size += row_size; - } - } - recv_mat->nnz = recv_mat->idx2.size(); - } - - - void waitall() - { - if (num_msgs) - { - RAPtor_MPI_Waitall(num_msgs, requests.data(), RAPtor_MPI_STATUSES_IGNORE); - } - } - void waitall(int n_msgs) - { - if (n_msgs) - { - RAPtor_MPI_Waitall(n_msgs, requests.data(), RAPtor_MPI_STATUSES_IGNORE); - } - } - - void pack_values(const double* values, int row_start, int size, char* send_buffer, - int bytes, int* ctr, RAPtor_MPI_Comm mpi_comm, int block_size) - { - RAPtor_MPI_Pack(&(values[row_start]), size, RAPtor_MPI_DOUBLE, send_buffer, - bytes, ctr, mpi_comm); - } - void pack_values(double const* const* values, int row_start, int size, - char* send_buffer, int bytes, int* ctr, RAPtor_MPI_Comm mpi_comm, int block_size) - { - for (int i = 0; i < size; i++) - { - RAPtor_MPI_Pack(values[row_start + i], block_size, RAPtor_MPI_DOUBLE, send_buffer, - bytes, ctr, mpi_comm); - } - } - - template - void unpack(std::vector& unpacked_buffer, RAPtor_MPI_Comm mpi_comm, const int block_size = 1) - { - if (num_msgs == 0) return; - - int position = 0; - int flat_size = size_msgs * block_size; - if (unpacked_buffer.size() < flat_size) unpacked_buffer.resize(flat_size); - RAPtor_MPI_Datatype datatype = get_type(); - RAPtor_MPI_Unpack(pack_buffer.data(), pack_buffer.size(), &position, - unpacked_buffer.data(), flat_size, datatype, mpi_comm); - } - - void reset_buffer() - { - pack_buffer.resize(size_msgs); - } - - int num_msgs; - int size_msgs; - std::vector procs; - std::vector indptr; - std::vector requests; - std::vector buffer; - std::vector int_buffer; - std::vector pack_buffer; - -}; - -class ContigData : public CommData -{ -public: - ContigData() : CommData() - { - } - - ContigData(ContigData* data) : CommData(data) - { - - } - - ~ContigData() - { - } - - ContigData* copy() - { - return new ContigData(this); - } - ContigData* copy(const std::vector& col_to_new) - { - bool comm_proc; - int proc, start, end; - int new_idx; - - ContigData* data = new ContigData(); - - data->size_msgs = 0; - for (int i = 0; i < num_msgs; i++) - { - comm_proc = false; - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - new_idx = col_to_new[j]; - if (new_idx != -1) - { - comm_proc = true; - data->size_msgs++; - } - } - if (comm_proc) - { - data->procs.emplace_back(proc); - data->indptr.emplace_back(data->size_msgs); - } - } - data->num_msgs = data->procs.size(); - data->finalize(); - - return data; - } - - void add_msg(int proc, int msg_size, int* msg_indices = NULL) - { - int last_ptr = indptr[num_msgs]; - procs.emplace_back(proc); - indptr.emplace_back(last_ptr + msg_size); - - num_msgs++; - size_msgs += msg_size; - } - - void probe(int n_recv, int key, RAPtor_MPI_Comm mpi_comm) - { - int size; - RAPtor_MPI_Status recv_status; - - size_msgs = 0; - indptr[0] = 0; - for (int i = 0; i < n_recv; i++) - { - RAPtor_MPI_Recv(&size, 1, RAPtor_MPI_INT, RAPtor_MPI_ANY_SOURCE, key, - mpi_comm, &recv_status); - procs.emplace_back(recv_status.RAPtor_MPI_SOURCE); - size_msgs += size; - indptr.emplace_back(size_msgs); - } - num_msgs = procs.size(); - finalize(); - } - - - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - int init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - double init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - if (num_msgs == 0) return; - - int start, end; - int proc; - - RAPtor_MPI_Datatype datatype = get_type(); - - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - RAPtor_MPI_Isend(&(values[start*block_size]), (end - start) * block_size, - datatype, proc, key, mpi_comm, &(requests[i])); - } - } - - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size = 1) - { - if (num_msgs == 0) - { - *n_send_ptr = 0; - return; - } - - int n_sends; - int proc, start, end, idx; - int ctr, prev_ctr; - bool comparison; - int size = size_msgs * block_size; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - n_sends = 0; - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - comparison = false; - idx = j * block_size; - for (int k = 0; k < block_size; k++) - { - if (compare_func(states[idx + k])) - { - comparison = true; - break; - } - } - if (comparison) - { - for (int k = 0; k < block_size; k++) - { - buf[ctr++] = values[idx+k]; - } - } - } - size = ctr - prev_ctr; - if (size) - { - RAPtor_MPI_Isend(&(buf[prev_ctr]), size, datatype, - proc, key, mpi_comm, &(requests[n_sends++])); - prev_ctr = ctr; - } - } - *n_send_ptr = n_sends; - } - - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - const double* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, - mpi_comm, block_size); - } - - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - double const* const* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, - mpi_comm, block_size); - } - - int get_msg_size(const int* rowptr, const bool has_vals, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - int start, end; - int row_start, row_end; - int num_ints, num_doubles; - int double_bytes, bytes; - - // Calculate total msg size - start = indptr[0]; - end = indptr[num_msgs]; - row_start = rowptr[start]; - row_end = rowptr[end]; - num_ints = (row_end - row_start) + (end - start); - num_doubles = (row_end - row_start) * block_size; - RAPtor_MPI_Pack_size(num_ints, RAPtor_MPI_INT, mpi_comm, &bytes); - - if (has_vals) - { - RAPtor_MPI_Pack_size(num_doubles, RAPtor_MPI_DOUBLE, mpi_comm, &double_bytes); - bytes += double_bytes; - } - - return bytes; - } - - // values can be double* (CSRMatrix) or double** (BSRMatrix) - template - void send_helper(char* send_buffer, - const int* rowptr, - const int* col_indices, - const T& values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - if (num_msgs == 0) return; - - int start, end, proc; - int ctr, prev_ctr, size; - int row_start, row_end; - int bytes; - - bytes = get_msg_size(rowptr, values, mpi_comm, block_size); - - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - row_start = rowptr[j]; - row_end = rowptr[j+1]; - size = row_end - row_start; - RAPtor_MPI_Pack(&size, 1, RAPtor_MPI_INT, send_buffer, bytes, - &ctr, mpi_comm); - RAPtor_MPI_Pack(&(col_indices[row_start]), size, RAPtor_MPI_INT, - send_buffer, bytes, &ctr, mpi_comm); - if (values) - { - pack_values(values, row_start, size, send_buffer, bytes, - &ctr, mpi_comm, block_size); - } - } - RAPtor_MPI_Isend(&(send_buffer[prev_ctr]), ctr - prev_ctr, RAPtor_MPI_PACKED, proc, - key, mpi_comm, &(requests[i])); - prev_ctr = ctr; - } - } - - void int_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - cond_recv(key, mpi_comm, off_proc_states, compare_func, s_recv_ptr, - n_recv_ptr, block_size); - } - void double_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - cond_recv(key, mpi_comm, off_proc_states, compare_func, s_recv_ptr, - n_recv_ptr, block_size); - } - - template - void cond_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - if (num_msgs == 0) - { - *s_recv_ptr = 0; - *n_recv_ptr = 0; - return; - } - - int n_recvs, ctr, prev_ctr; - int proc, start, end, idx; - int size = size_msgs * block_size; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - n_recvs = 0; - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx = j * block_size; - for (int k = 0; k < block_size; k++) - { - if (compare_func(off_proc_states[idx + k])) - { - ctr += block_size; - break; - } - } - } - if (ctr - prev_ctr) - { - RAPtor_MPI_Irecv(&(buf[prev_ctr]), ctr - prev_ctr, datatype, - proc, key, mpi_comm, &(requests[n_recvs++])); - prev_ctr = ctr; - } - } - - *n_recv_ptr = n_recvs; - *s_recv_ptr = ctr; - } - -}; - -class NonContigData : public CommData -{ -public: - NonContigData() : CommData() - { - } - - NonContigData(NonContigData* data) : CommData(data) - { - std::copy(data->indices.begin(), data->indices.end(), - std::back_inserter(indices)); - } - - ~NonContigData() - { - } - - NonContigData* copy() - { - return new NonContigData(this); - } - - NonContigData* copy(const std::vector& col_to_new) - { - bool comm_proc; - int proc, start, end; - int idx, new_idx; - - NonContigData* data = new NonContigData(); - - data->size_msgs = 0; - for (int i = 0; i < num_msgs; i++) - { - comm_proc = false; - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx = indices[j]; - new_idx = col_to_new[idx]; - if (new_idx != -1) - { - comm_proc = true; - data->indices.emplace_back(new_idx); - } - } - if (comm_proc) - { - data->procs.emplace_back(proc); - data->indptr.emplace_back(data->indices.size()); - } - } - data->size_msgs = data->indices.size(); - data->num_msgs = data->procs.size(); - data->finalize(); - - return data; - } - - void add_msg(int proc, - int msg_size, - int* msg_indices = NULL) - { - int last_ptr = indptr[num_msgs]; - procs.emplace_back(proc); - indptr.emplace_back(last_ptr + msg_size); - if (msg_indices) - { - for (int i = 0; i < msg_size; i++) - { - indices.emplace_back(msg_indices[i]); - } - } - - num_msgs++; - size_msgs += msg_size; - } - - void probe(int size, int key, RAPtor_MPI_Comm mpi_comm) - { - int proc, count; - int size_recvd; - RAPtor_MPI_Status recv_status; - - size_msgs = size; - indices.resize(size_msgs); - indptr[0] = 0; - size_recvd = 0; - while (size_recvd < size_msgs) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, key, mpi_comm, &recv_status); - proc = recv_status.RAPtor_MPI_SOURCE; - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_INT, &count); - RAPtor_MPI_Recv(&(indices[size_recvd]), count, RAPtor_MPI_INT, proc, - key, mpi_comm, &recv_status); - size_recvd += count; - procs.emplace_back(proc); - indptr.emplace_back(size_recvd); - } - num_msgs = procs.size(); - finalize(); - } - - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - int init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - double init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - if (num_msgs == 0) return; - - - int start, end; - int proc, idx, pos; - int size = size_msgs * block_size; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx = indices[j] * block_size; - pos = j * block_size; - for (int k = 0; k < block_size; k++) - { - buf[pos + k] = values[idx + k]; - } - } - RAPtor_MPI_Isend(&(buf[start*block_size]), (end - start) * block_size, - datatype, proc, key, mpi_comm, &(requests[i])); - } - } - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size = 1) - { - if (num_msgs == 0) - { - *n_send_ptr = 0; - return; - } - - int n_sends; - int proc, start, end; - int idx; - int ctr, prev_ctr; - bool comparison; - int size = size_msgs * block_size; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - n_sends = 0; - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx = indices[j] * block_size; - comparison = false; - for (int k = 0; k < block_size; k++) - { - // If compare true for any idx in block - // Add full block to message - if (compare_func(states[idx + k])) - { - comparison = true; - break; - } - } - if (comparison) - { - for (int k = 0; k < block_size; k++) - { - buf[ctr++] = values[idx+k]; - } - } - } - if (ctr - prev_ctr) - { - RAPtor_MPI_Isend(&(buf[prev_ctr]), ctr - prev_ctr, datatype, - proc, key, mpi_comm, &(requests[n_sends++])); - prev_ctr = ctr; - } - } - - *n_send_ptr = n_sends; - } - - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - const double* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, - mpi_comm, block_size); - } - - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - double const* const* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, - mpi_comm, block_size); - } - - int get_msg_size(const int* rowptr, const bool has_vals, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - int num_ints, num_doubles; - int double_bytes, bytes; - - // Calculate message size - num_ints = indptr[num_msgs] - indptr[0]; - num_doubles = 0; - for (std::vector::iterator it = indices.begin(); - it != indices.end(); ++it) - { - num_doubles += (rowptr[*it+1] - rowptr[*it]); - } - num_ints += num_doubles; - RAPtor_MPI_Pack_size(num_ints, RAPtor_MPI_INT, mpi_comm, &bytes); - - if (has_vals) - { - RAPtor_MPI_Pack_size(num_doubles * block_size, RAPtor_MPI_DOUBLE, mpi_comm, &double_bytes); - bytes += double_bytes; - } - - return bytes; - } - - template - void send_helper(char* send_buffer, - const int* rowptr, - const int* col_indices, - const T& values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - if (num_msgs == 0) return; - - int start, end, proc; - int ctr, prev_ctr, size; - int row, row_start, row_end; - int bytes; - - // Resize send buffer - bytes = get_msg_size(rowptr, values, mpi_comm, block_size); - - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - row = indices[j]; - row_start = rowptr[row]; - row_end = rowptr[row+1]; - size = (row_end - row_start); - RAPtor_MPI_Pack(&size, 1, RAPtor_MPI_INT, send_buffer, bytes, - &ctr, mpi_comm); - RAPtor_MPI_Pack(&(col_indices[row_start]), size, RAPtor_MPI_INT, - send_buffer, bytes, &ctr, mpi_comm); - if (values) - { - pack_values(values, row_start, size, send_buffer, bytes, &ctr, - mpi_comm, block_size); - } - } - RAPtor_MPI_Isend(&(send_buffer[prev_ctr]), ctr - prev_ctr, RAPtor_MPI_PACKED, proc, - key, mpi_comm, &(requests[i])); - prev_ctr = ctr; - } - } - - - void int_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - cond_recv(key, mpi_comm, off_proc_states, compare_func, s_recv_ptr, - n_recv_ptr, block_size); - } - void double_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - cond_recv(key, mpi_comm, off_proc_states, compare_func, s_recv_ptr, - n_recv_ptr, block_size); - } - - template - void cond_recv(int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& off_proc_states, - std::function compare_func, - int* s_recv_ptr, int* n_recv_ptr, const int block_size = 1) - { - if (num_msgs == 0) - { - *s_recv_ptr = 0; - *n_recv_ptr = 0; - return; - } - - int n_recvs, ctr, prev_ctr; - int proc, start, end, idx; - int size = size_msgs * block_size; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - n_recvs = 0; - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx = indices[j] * block_size; - for (int k = 0; k < block_size; k++) - { - if (compare_func(off_proc_states[idx + k])) - { - ctr += block_size; - break; - } - } - } - if (ctr - prev_ctr) - { - RAPtor_MPI_Irecv(&(buf[prev_ctr]), ctr - prev_ctr, datatype, proc, - key, mpi_comm, &(requests[n_recvs++])); - prev_ctr = ctr; - } - } - - *n_recv_ptr = n_recvs; - *s_recv_ptr = ctr; - } - - std::vector indices; - -}; - -class DuplicateData : public NonContigData -{ -public: - DuplicateData() : NonContigData() - { - } - - DuplicateData(DuplicateData* data) : NonContigData(data) - { - std::copy(data->indptr_T.begin(), data->indptr_T.end(), - std::back_inserter(indptr_T)); - } - - ~DuplicateData() - { - } - - DuplicateData* copy() - { - return new DuplicateData(this); - } - DuplicateData* copy(const std::vector& col_to_new) - { - bool comm_proc, comm_idx; - int proc, start, end; - int idx, new_idx; - int idx_start, idx_end; - - DuplicateData* data = new DuplicateData(); - - data->indptr_T.emplace_back(0); - for (int i = 0; i < num_msgs; i++) - { - comm_proc = false; - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - comm_idx = false; - idx_start = indptr_T[j]; - idx_end = indptr_T[j+1]; - for (int k = idx_start; k < idx_end; k++) - { - idx = indices[k]; - new_idx = col_to_new[idx]; - if (new_idx != -1) - { - comm_idx = true; - data->indices.emplace_back(new_idx); - } - } - if (comm_idx) - { - comm_proc = true; - data->indptr_T.emplace_back(data->indices.size()); - } - } - if (comm_proc) - { - data->procs.emplace_back(proc); - data->indptr.emplace_back(data->indptr_T.size() - 1); - } - } - data->size_msgs = data->indptr_T.size() - 1; - data->num_msgs = data->procs.size(); - data->finalize(); - - return data; - } - - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - int init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size, - std::function init_result_func, - double init_result_func_val) - { - send(values, key, mpi_comm, block_size, init_result_func, - init_result_func_val); - } - void int_send(const int* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - void double_send(const double* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size) - { - send(values, key, mpi_comm, states, compare_func, n_send_ptr, block_size); - } - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - if (num_msgs == 0) return; - - - int start, end; - int proc, idx; - int idx_start, idx_end; - int size = size_msgs * block_size; - int pos; - - RAPtor_MPI_Datatype datatype = get_type(); - std::vector& buf = get_buffer(); - if ((int)buf.size() < size) buf.resize(size); - - std::vector tmp(block_size); - - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - idx_start = indptr_T[j]; - idx_end = indptr_T[j+1]; - std::fill(tmp.begin(), tmp.end(), init_result_func_val); - for (int k = idx_start; k < idx_end; k++) - { - idx = indices[k] * block_size; - for (int l = 0; l < block_size; l++) - { - tmp[l] = init_result_func(tmp[l], values[idx+l]); - } - } - pos = j * block_size; - for (int k = 0; k < block_size; k++) - { - buf[pos + k] = tmp[k]; - } - } - RAPtor_MPI_Isend(&(buf[start * block_size]), (end - start) * block_size, - datatype, proc, key, mpi_comm, &(requests[i])); - } - } - - template - void send(const T* values, int key, RAPtor_MPI_Comm mpi_comm, - const std::vector& states, std::function compare_func, - int* n_send_ptr, const int block_size = 1) - { - - } - - void append_val(std::vector& vec, const double val, int block_size) - { - vec.emplace_back(val); - } - void append_val(std::vector& vec, const double* val, int block_size) - { - for (int i = 0; i < block_size; i++) - vec.emplace_back(val[i]); - } - - template - void combine_entries(int j, const int* rowptr, const int* col_indices, - const T& values, int block_size, std::vector& send_indices, - std::vector& send_values, int* size_ptr) - { - int idx_start, idx_end; - int row_start, row_end; - int size, row, idx, ctr; - - idx_start = indptr_T[j]; - idx_end = indptr_T[j+1]; - for (int k = idx_start; k < idx_end; k++) - { - row = indices[k]; - row_start = rowptr[row]; - row_end = rowptr[row+1]; - for (int l = row_start; l < row_end; l++) - { - send_indices.emplace_back(col_indices[l]); - append_val(send_values, values[l], block_size); - } - } - if (send_indices.size()) - { - vec_sort(send_indices, send_values); - size = 1; - - int s_send = send_indices.size(); - for (int k = 1; k < s_send; k++) - { - ctr = k * block_size; - if (send_indices[k] != send_indices[size - 1]) - { - idx = size * block_size; - for (int i = 0; i < block_size; i++) - { - send_values[idx + i] = send_values[ctr + i]; - } - send_indices[size++] = send_indices[k]; - } - else - { - idx = (size - 1) * block_size; - for (int i = 0; i < block_size; i++) - { - send_values[idx + i] += send_values[ctr + i]; - } - } - } - } - else size = 0; - - *size_ptr = size; - } - - void combine_entries(int j, const int* rowptr, const int* col_indices, - std::vector& send_indices, int* size_ptr) - { - int idx_start, idx_end; - int row_start, row_end; - int size, row; - - idx_start = indptr_T[j]; - idx_end = indptr_T[j+1]; - for (int k = idx_start; k < idx_end; k++) - { - row = indices[k]; - row_start = rowptr[row]; - row_end = rowptr[row+1]; - for (int l = row_start; l < row_end; l++) - { - send_indices.emplace_back(col_indices[l]); - } - } - if (send_indices.size()) - { - size = 1; - std::sort(send_indices.begin(), send_indices.end()); - int s_send = send_indices.size(); - for (int k = 1; k < s_send; k++) - { - if (send_indices[k] != send_indices[size - 1]) - { - send_indices[size++] = send_indices[k]; - } - } - } - else size = 0; - - *size_ptr = size; - } - - - // TODO -- how to communicate block matrices? - // - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - const double* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, mpi_comm, block_size); - } - void send(char* send_buffer, - const int* rowptr, - const int* col_indices, - double const* const* values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - send_helper(send_buffer, rowptr, col_indices, values, key, mpi_comm, block_size); - } - - int get_msg_size(const int* rowptr, const bool has_vals, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - int num_ints, num_doubles; - int double_bytes, bytes; - - // Calculate message size (upper bound) - num_ints = indptr[num_msgs] - indptr[0]; - num_doubles = 0; - for (std::vector::iterator it = indices.begin(); - it != indices.end(); ++it) - { - num_doubles += (rowptr[*it+1] - rowptr[*it]); - } - num_ints += num_doubles; - RAPtor_MPI_Pack_size(num_ints, RAPtor_MPI_INT, mpi_comm, &bytes); - if (has_vals) - { - RAPtor_MPI_Pack_size(num_doubles * block_size, RAPtor_MPI_DOUBLE, mpi_comm, &double_bytes); - bytes += double_bytes; - } - - return bytes; - } - - template - void send_helper(char* send_buffer, - const int* rowptr, - const int* col_indices, - const T& values, - int key, RAPtor_MPI_Comm mpi_comm, - const int block_size = 1) - { - if (num_msgs == 0) return; - - int start, end, proc; - int ctr, prev_ctr, size; - int bytes; - - // Resize send buffer - bytes = get_msg_size(rowptr, values, mpi_comm, block_size); - - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < num_msgs; i++) - { - proc = procs[i]; - start = indptr[i]; - end = indptr[i+1]; - for (int j = start; j < end; j++) - { - std::vector send_indices; - std::vector send_values; - - if (values) - { - combine_entries(j, rowptr, col_indices, values, block_size, - send_indices, send_values, &size); - } - else - { - combine_entries(j, rowptr, col_indices, send_indices, &size); - } - RAPtor_MPI_Pack(&size, 1, RAPtor_MPI_INT, send_buffer, bytes, &ctr, mpi_comm); - RAPtor_MPI_Pack(send_indices.data(), size, RAPtor_MPI_INT, send_buffer, - bytes, &ctr, mpi_comm); - - if (values) - { - pack_values(send_values.data(), 0, size, send_buffer, bytes, &ctr, - mpi_comm, block_size); - } - } - RAPtor_MPI_Isend(&(send_buffer[prev_ctr]), ctr - prev_ctr, RAPtor_MPI_PACKED, proc, - key, mpi_comm, &(requests[i])); - prev_ctr = ctr; - } - } - - std::vector indptr_T; - -}; - -} -#endif - diff --git a/raptor/core/comm_mat.cpp b/raptor/core/comm_mat.cpp deleted file mode 100644 index d1fa40da..00000000 --- a/raptor/core/comm_mat.cpp +++ /dev/null @@ -1,810 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "comm_pkg.hpp" -#include "par_matrix.hpp" - -using namespace raptor; - -// Forward Declarations - -// Helper Methods -template std::vector& create_mat(int n, int m, int b_n, int b_m, - CSRMatrix** mat_ptr); -template CSRMatrix* communication_helper(const int* rowptr, - const int* col_indices, const T& values, - CommData* send_comm, CommData* recv_comm, int key, RAPtor_MPI_Comm mpi_comm, - const int b_rows, const int b_cols, const bool has_vals = true); -template void init_comm_helper(char* send_buffer, - const int* rowptr, const int* col_indices, const T& values, - CommData* send_comm, int key, RAPtor_MPI_Comm mpi_comm, const int b_rows, - const int b_cols); -CSRMatrix* complete_comm_helper(CommData* send_comm, - CommData* recv_comm, int key, RAPtor_MPI_Comm mpi_comm, const int b_rows, - const int b_cols, const bool has_vals = true); - -template CSRMatrix* transpose_recv(CSRMatrix* recv_mat_T, - std::vector& T_vals, NonContigData* send_data, int n); -template CSRMatrix* combine_recvs(CSRMatrix* L_mat, CSRMatrix* R_mat, - std::vector& L_vals, std::vector& R_vals, const int b_rows, - const int b_cols, NonContigData* local_L_recv, NonContigData* local_R_recv, - std::vector& row_sizes); -template CSRMatrix* combine_recvs_T(CSRMatrix* L_mat, - CSRMatrix* final_mat, NonContigData* local_L_send, NonContigData* final_send, - std::vector& L_vals, std::vector& final_vals, int n, - int b_rows, int b_cols); - - -// Main Methods -CSRMatrix* CommPkg::communicate(ParCSRMatrix* A, const bool has_vals) -{ - std::vector send_buffer; - init_par_mat_comm(A, send_buffer, has_vals); - return complete_mat_comm(A->on_proc->b_rows, A->on_proc->b_cols, - has_vals); -} -CSRMatrix* CommPkg::communicate(ParBSRMatrix* A, const bool has_vals) -{ - std::vector send_buffer; - init_par_mat_comm(A, send_buffer, has_vals); - return complete_mat_comm(A->on_proc->b_rows, A->on_proc->b_cols, - has_vals); -} -void CommPkg::init_par_mat_comm(ParCSRMatrix* A, std::vector& send_buffer, - const bool has_vals) -{ - int start, end; - int ctr; - int global_col; - - int nnz = A->on_proc->nnz + A->off_proc->nnz; - std::vector rowptr(A->local_num_rows + 1); - std::vector col_indices; - std::vector values; - if (nnz) - { - col_indices.resize(nnz); - if (has_vals) - values.resize(nnz); - } - - ctr = 0; - rowptr[0] = ctr; - for (int i = 0; i < A->local_num_rows; i++) - { - start = A->on_proc->idx1[i]; - end = A->on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - global_col = A->on_proc_column_map[A->on_proc->idx2[j]]; - if (has_vals) values[ctr] = A->on_proc->vals[j]; - col_indices[ctr++] = global_col; - } - - start = A->off_proc->idx1[i]; - end = A->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - global_col = A->off_proc_column_map[A->off_proc->idx2[j]]; - if (has_vals) values[ctr] = A->off_proc->vals[j]; - col_indices[ctr++] = global_col; - } - rowptr[i+1] = ctr; - } - return init_mat_comm(send_buffer, rowptr, col_indices, values, - A->on_proc->b_rows, A->on_proc->b_cols, has_vals); -} -void CommPkg::init_par_mat_comm(ParBSRMatrix* A, std::vector& send_buffer, - const bool has_vals) -{ - int start, end; - int ctr; - int global_col; - - int nnz = A->on_proc->nnz + A->off_proc->nnz; - std::vector rowptr(A->local_num_rows + 1); - std::vector col_indices; - std::vector values; - if (nnz) - { - col_indices.resize(nnz); - if (has_vals) - values.resize(nnz); - } - - BSRMatrix* A_on = (BSRMatrix*) A->on_proc; - BSRMatrix* A_off = (BSRMatrix*) A->off_proc; - - ctr = 0; - rowptr[0] = ctr; - for (int i = 0; i < A->local_num_rows; i++) - { - start = A->on_proc->idx1[i]; - end = A->on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - global_col = A->on_proc_column_map[A->on_proc->idx2[j]]; - if (has_vals) values[ctr] = A->on_proc->copy_val(A_on->block_vals[j]); - col_indices[ctr++] = global_col; - } - - start = A->off_proc->idx1[i]; - end = A->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - global_col = A->off_proc_column_map[A->off_proc->idx2[j]]; - if (has_vals) values[ctr] = A->off_proc->copy_val(A_off->block_vals[j]); - col_indices[ctr++] = global_col; - } - rowptr[i+1] = ctr; - } - return init_mat_comm(send_buffer, rowptr, col_indices, values, - A->on_proc->b_rows, A->on_proc->b_cols, has_vals); -} - -CSRMatrix* ParComm::communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm(b_rows, b_cols, has_vals); -} -CSRMatrix* ParComm::communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm(b_rows, b_cols, has_vals); -} - -void ParComm::init_mat_comm(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows, const int b_cols, - const bool has_vals) -{ - int s = send_data->get_msg_size(rowptr.data(), values.data(), mpi_comm, b_rows * b_cols); - send_buffer.resize(s); - init_comm_helper(send_buffer.data(), rowptr.data(), col_indices.data(), values.data(), - send_data, key, mpi_comm, b_rows, b_cols); -} -void ParComm::init_mat_comm(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows, const int b_cols, - const bool has_vals) -{ - int s = send_data->get_msg_size(rowptr.data(), values.data(), mpi_comm, b_rows * b_cols); - send_buffer.resize(s); - init_comm_helper(send_buffer.data(), rowptr.data(), col_indices.data(), values.data(), - send_data, key, mpi_comm, b_rows, b_cols); -} - -CSRMatrix* ParComm::complete_mat_comm(const int b_rows, const int b_cols, - const bool has_vals) -{ - CSRMatrix* recv_mat = complete_comm_helper(send_data, recv_data, key, mpi_comm, - b_rows, b_cols, has_vals); - key++; - return recv_mat; -} - - -CSRMatrix* ParComm::communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm_T(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm_T(n_result_rows, b_rows, b_cols, has_vals); -} -CSRMatrix* ParComm::communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm_T(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm_T(n_result_rows, b_rows, b_cols, has_vals); -} -void ParComm::init_mat_comm_T(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int s = recv_data->get_msg_size(rowptr.data(), values.data(), mpi_comm, b_rows * b_cols); - send_buffer.resize(s); - init_comm_helper(send_buffer.data(), rowptr.data(), col_indices.data(), values.data(), - recv_data, key, mpi_comm, b_rows, b_cols); -} -void ParComm::init_mat_comm_T(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int s = recv_data->get_msg_size(rowptr.data(), values.data(), mpi_comm, b_rows * b_cols); - send_buffer.resize(s); - init_comm_helper(send_buffer.data(), rowptr.data(), col_indices.data(), values.data(), - recv_data, key, mpi_comm, b_rows, b_cols); -} -CSRMatrix* ParComm::complete_mat_comm_T(const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - CSRMatrix* recv_mat_T = complete_comm_helper(recv_data, send_data, key, mpi_comm, - b_rows, b_cols, has_vals); - - CSRMatrix* recv_mat; - if (b_rows > 1 || b_cols > 1) - { - BSRMatrix* recv_mat_T_bsr = (BSRMatrix*) recv_mat_T; - recv_mat = transpose_recv(recv_mat_T_bsr, recv_mat_T_bsr->block_vals, - send_data, n_result_rows); - } - else - { - recv_mat = transpose_recv(recv_mat_T, recv_mat_T->vals, - send_data, n_result_rows); - } - - delete recv_mat_T; - return recv_mat; -} - - - - - - -CSRMatrix* TAPComm::communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm(b_rows, b_cols, has_vals); -} - -CSRMatrix* TAPComm::communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm(b_rows, b_cols, has_vals); -} -void TAPComm::init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int block_size = b_rows * b_cols; - int l_bytes = local_L_par_comm->send_data->get_msg_size(rowptr.data(), - values.data(), local_L_par_comm->mpi_comm, block_size); - int g_bytes; - - if (local_S_par_comm) - { - CSRMatrix* S_mat = local_S_par_comm->communicate(rowptr, col_indices, values, - b_rows, b_cols, has_vals); - g_bytes = global_par_comm->send_data->get_msg_size(S_mat->idx1.data(), - S_mat->vals.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - - init_comm_helper(&(send_buffer[0]), S_mat->idx1.data(), - S_mat->idx2.data(), S_mat->vals.data(), global_par_comm->send_data, - global_par_comm->key, global_par_comm->mpi_comm, b_rows, b_cols); - delete S_mat; - } - else - { - g_bytes = global_par_comm->send_data->get_msg_size(rowptr.data(), - values.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - init_comm_helper(&(send_buffer[0]), rowptr.data(), col_indices.data(), - values.data(), global_par_comm->send_data, global_par_comm->key, - global_par_comm->mpi_comm, b_rows, b_cols); - } - - init_comm_helper(&(send_buffer[g_bytes]), rowptr.data(), col_indices.data(), - values.data(), local_L_par_comm->send_data, local_L_par_comm->key, - local_L_par_comm->mpi_comm, b_rows, b_cols); -} - - -void TAPComm::init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int block_size = b_rows * b_cols; - int l_bytes = local_L_par_comm->send_data->get_msg_size(rowptr.data(), - values.data(), local_L_par_comm->mpi_comm, block_size); - int g_bytes; - - if (local_S_par_comm) - { - BSRMatrix* S_mat = (BSRMatrix*) local_S_par_comm->communicate(rowptr, col_indices, values, - b_rows, b_cols, has_vals); - g_bytes = global_par_comm->send_data->get_msg_size(S_mat->idx1.data(), - S_mat->block_vals.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - - init_comm_helper(&(send_buffer[0]), S_mat->idx1.data(), - S_mat->idx2.data(), S_mat->vals.data(), global_par_comm->send_data, - global_par_comm->key, global_par_comm->mpi_comm, b_rows, b_cols); - delete S_mat; - } - else - { - g_bytes = global_par_comm->send_data->get_msg_size(rowptr.data(), - values.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - init_comm_helper(&(send_buffer[0]), rowptr.data(), col_indices.data(), - values.data(), global_par_comm->send_data, global_par_comm->key, - global_par_comm->mpi_comm, b_rows, b_cols); - } - - init_comm_helper(&(send_buffer[g_bytes]), rowptr.data(), col_indices.data(), - values.data(), local_L_par_comm->send_data, local_L_par_comm->key, - local_L_par_comm->mpi_comm, b_rows, b_cols); -} - -CSRMatrix* TAPComm::complete_mat_comm(const int b_rows, const int b_cols, const bool has_vals) -{ - CSRMatrix* G_mat = global_par_comm->complete_mat_comm(b_rows, b_cols, has_vals); - CSRMatrix* L_mat = local_L_par_comm->complete_mat_comm(b_rows, b_cols, has_vals); - - CSRMatrix* R_mat; - CSRMatrix* recv_mat; - if (b_rows > 1 || b_cols > 1) - { - BSRMatrix* G_mat_bsr = (BSRMatrix*) G_mat; - R_mat = local_R_par_comm->communicate(G_mat_bsr->idx1, G_mat_bsr->idx2, - G_mat_bsr->block_vals, b_rows, b_cols, has_vals); - - BSRMatrix* R_mat_bsr = (BSRMatrix*) R_mat; - BSRMatrix* L_mat_bsr = (BSRMatrix*) L_mat; - - // Create recv_mat (combination of L_mat and R_mat) - recv_mat = combine_recvs(L_mat_bsr, R_mat_bsr, - L_mat_bsr->block_vals, R_mat_bsr->block_vals, b_rows, b_cols, - (NonContigData*) local_L_par_comm->recv_data, - (NonContigData*) local_R_par_comm->recv_data, - get_buffer()); - } - else - { - R_mat = local_R_par_comm->communicate(G_mat->idx1, G_mat->idx2, - G_mat->vals, b_rows, b_cols, has_vals); - - // Create recv_mat (combination of L_mat and R_mat) - recv_mat = combine_recvs(L_mat, R_mat, - L_mat->vals, R_mat->vals, b_rows, b_cols, - (NonContigData*) local_L_par_comm->recv_data, - (NonContigData*) local_R_par_comm->recv_data, - get_buffer()); - } - delete G_mat; - delete R_mat; - delete L_mat; - - return recv_mat; -} - - -CSRMatrix* TAPComm::communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm_T(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm_T(n_result_rows, b_rows, b_cols, has_vals); -} - -CSRMatrix* TAPComm::communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - init_mat_comm_T(send_buffer, rowptr, col_indices, values, b_rows, b_cols, has_vals); - return complete_mat_comm_T(n_result_rows, b_rows, b_cols, has_vals); -} -void TAPComm::init_mat_comm_T(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int block_size = b_rows * b_cols; - - // Transpose communication with local_R_par_comm - CSRMatrix* R_mat = communication_helper(rowptr.data(), col_indices.data(), - values.data(), local_R_par_comm->recv_data, - local_R_par_comm->send_data, local_R_par_comm->key, - local_R_par_comm->mpi_comm, b_rows, b_cols, has_vals); - local_R_par_comm->key++; - - // Calculate size of send_buffer for global and local_L - int l_bytes = local_L_par_comm->recv_data->get_msg_size(rowptr.data(), - values.data(), local_L_par_comm->mpi_comm, block_size); - int g_bytes = global_par_comm->recv_data->get_msg_size(R_mat->idx1.data(), - R_mat->vals.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - - // Initialize global_par_comm - init_comm_helper(&(send_buffer[0]), R_mat->idx1.data(), R_mat->idx2.data(), - R_mat->vals.data(), global_par_comm->recv_data, global_par_comm->key, - global_par_comm->mpi_comm, b_rows, b_cols); - delete R_mat; - - // Initialize local_L_par_comm - init_comm_helper(&(send_buffer[g_bytes]), rowptr.data(), col_indices.data(), - values.data(), local_L_par_comm->recv_data, - local_L_par_comm->key, local_L_par_comm->mpi_comm, - b_rows, b_cols); -} -void TAPComm::init_mat_comm_T(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows, const int b_cols, const bool has_vals) -{ - int block_size = b_rows * b_cols; - - // Transpose communication with local_R_par_comm - BSRMatrix* R_mat = (BSRMatrix*) communication_helper(rowptr.data(), col_indices.data(), - values.data(), local_R_par_comm->recv_data, - local_R_par_comm->send_data, local_R_par_comm->key, - local_R_par_comm->mpi_comm, b_rows, b_cols, has_vals); - local_R_par_comm->key++; - - // Calculate size of send_buffer for global and local_L - int l_bytes = local_L_par_comm->recv_data->get_msg_size(rowptr.data(), - values.data(), local_L_par_comm->mpi_comm, block_size); - int g_bytes = global_par_comm->recv_data->get_msg_size(R_mat->idx1.data(), - R_mat->block_vals.data(), global_par_comm->mpi_comm, block_size); - send_buffer.resize(l_bytes + g_bytes); - - // Initialize global_par_comm - init_comm_helper(&(send_buffer[0]), R_mat->idx1.data(), R_mat->idx2.data(), - R_mat->block_vals.data(), global_par_comm->recv_data, global_par_comm->key, - global_par_comm->mpi_comm, b_rows, b_cols); - delete R_mat; - - // Initialize local_L_par_comm - init_comm_helper(&(send_buffer[g_bytes]), rowptr.data(), col_indices.data(), - values.data(), local_L_par_comm->recv_data, - local_L_par_comm->key, local_L_par_comm->mpi_comm, - b_rows, b_cols); - -} -CSRMatrix* TAPComm::complete_mat_comm_T(const int n_result_rows, const int b_rows, const int b_cols, const bool has_vals) -{ - CSRMatrix* G_mat = complete_comm_helper(global_par_comm->recv_data, - global_par_comm->send_data, global_par_comm->key, - global_par_comm->mpi_comm, b_rows, b_cols, has_vals); - global_par_comm->key++; - - - CSRMatrix* L_mat = complete_comm_helper(local_L_par_comm->recv_data, - local_L_par_comm->send_data, local_L_par_comm->key, - local_L_par_comm->mpi_comm, b_rows, b_cols, has_vals); - local_L_par_comm->key++; - - - CSRMatrix* final_mat; - CSRMatrix* recv_mat; - ParComm* final_comm; - if (b_rows > 1 || b_cols > 1) - { - BSRMatrix* L_mat_bsr = (BSRMatrix*) L_mat; - if (local_S_par_comm) - { - BSRMatrix* G_mat_bsr = (BSRMatrix*) G_mat; - final_mat = communication_helper(G_mat_bsr->idx1.data(), G_mat_bsr->idx2.data(), - G_mat_bsr->block_vals.data(), local_S_par_comm->recv_data, - local_S_par_comm->send_data, local_S_par_comm->key, - local_S_par_comm->mpi_comm, b_rows, b_cols, has_vals); - local_S_par_comm->key++; - delete G_mat; - final_comm = local_S_par_comm; - } - else - { - final_mat = G_mat; - final_comm = global_par_comm; - } - BSRMatrix* final_mat_bsr = (BSRMatrix*) final_mat; - - recv_mat = combine_recvs_T(L_mat_bsr, final_mat_bsr, - local_L_par_comm->send_data, final_comm->send_data, - L_mat_bsr->vals, final_mat_bsr->vals, n_result_rows, b_rows, b_cols); - } - else - { - if (local_S_par_comm) - { - final_mat = communication_helper(G_mat->idx1.data(), G_mat->idx2.data(), - G_mat->vals.data(), local_S_par_comm->recv_data, local_S_par_comm->send_data, - local_S_par_comm->key, local_S_par_comm->mpi_comm, b_rows, b_cols, has_vals); - local_S_par_comm->key++; - delete G_mat; - final_comm = local_S_par_comm; - } - else - { - final_mat = G_mat; - final_comm = global_par_comm; - } - - recv_mat = combine_recvs_T(L_mat, final_mat, - local_L_par_comm->send_data, final_comm->send_data, - L_mat->vals, final_mat->vals, n_result_rows, b_rows, b_cols); - } - - - - - delete L_mat; - delete final_mat; - - return recv_mat; -} - - - - - - -// Helper Methods -// Create matrix (either CSR or BSR) -template<> std::vector& create_mat(int n, int m, int b_n, int b_m, - CSRMatrix** mat_ptr) -{ - CSRMatrix* recv_mat = new CSRMatrix(n, m); - *mat_ptr = recv_mat; - return recv_mat->vals; -} -template<> std::vector& create_mat(int n, int m, int b_n, int b_m, - CSRMatrix** mat_ptr) -{ - BSRMatrix* recv_mat = new BSRMatrix(n, m, b_n, b_m); - *mat_ptr = recv_mat; - return recv_mat->block_vals; -} - -template // double* or double** -CSRMatrix* communication_helper(const int* rowptr, - const int* col_indices, const T& values, - CommData* send_comm, CommData* recv_comm, int key, RAPtor_MPI_Comm mpi_comm, - const int b_rows, const int b_cols, const bool has_vals) -{ - std::vector send_buffer; - int s = send_comm->get_msg_size(rowptr, values, mpi_comm, b_rows * b_cols); - send_buffer.resize(s); - init_comm_helper(send_buffer.data(), rowptr, col_indices, values, send_comm, - key, mpi_comm, b_rows, b_cols); - return complete_comm_helper(send_comm, recv_comm, key, mpi_comm, - b_rows, b_cols, has_vals); -} -template // double* or double** -void init_comm_helper(char* send_buffer, const int* rowptr, - const int* col_indices, const T& values, - CommData* send_comm, int key, RAPtor_MPI_Comm mpi_comm, - const int b_rows, const int b_cols) -{ - int block_size = b_rows * b_cols; - if (profile) mat_t -= RAPtor_MPI_Wtime(); - send_comm->send(send_buffer, rowptr, col_indices, values, - key, mpi_comm, block_size); - if (profile) mat_t += RAPtor_MPI_Wtime(); -} -CSRMatrix* complete_comm_helper(CommData* send_comm, CommData* recv_comm, int key, - RAPtor_MPI_Comm mpi_comm, const int b_rows, const int b_cols, const bool has_vals) -{ - CSRMatrix* recv_mat; - - // Form recv_mat - int block_size = b_rows * b_cols; - if (b_rows > 1 || b_cols > 1) - recv_mat = new BSRMatrix(recv_comm->size_msgs, -1, b_rows, b_cols); - else - recv_mat = new CSRMatrix(recv_comm->size_msgs, -1); - - // Recv contents of recv_mat - if (profile) mat_t -= RAPtor_MPI_Wtime(); - recv_comm->recv(recv_mat, key, mpi_comm, block_size, has_vals); - if (send_comm->num_msgs) - RAPtor_MPI_Waitall(send_comm->num_msgs, send_comm->requests.data(), - RAPtor_MPI_STATUSES_IGNORE); - if (profile) mat_t += RAPtor_MPI_Wtime(); - return recv_mat; -} - - - -template -CSRMatrix* transpose_recv(CSRMatrix* recv_mat_T, std::vector& T_vals, - NonContigData* send_data, int n) -{ - int idx, ptr; - int start, end; - - CSRMatrix* recv_mat; - std::vector& vals = create_mat(n, -1, recv_mat_T->b_rows, - recv_mat_T->b_cols, &recv_mat); - - if (n == 0) return recv_mat; - - std::vector row_sizes(n, 0); - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i]; - start = recv_mat_T->idx1[i]; - end = recv_mat_T->idx1[i+1]; - row_sizes[idx] += end - start; - } - recv_mat->idx1[0] = 0; - for (int i = 0; i < n; i++) - { - recv_mat->idx1[i+1] = recv_mat->idx1[i] + row_sizes[i]; - row_sizes[i] = 0; - } - recv_mat->nnz = recv_mat->idx1[n]; - if (recv_mat->nnz) - { - recv_mat->idx2.resize(recv_mat->nnz); - if (T_vals.size()) - vals.resize(recv_mat->nnz); - } - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i]; - start = recv_mat_T->idx1[i]; - end = recv_mat_T->idx1[i+1]; - for (int j = start; j < end; j++) - { - ptr = recv_mat->idx1[idx] + row_sizes[idx]++; - recv_mat->idx2[ptr] = recv_mat_T->idx2[j]; - if (recv_mat_T->vals.size()) - vals[ptr] = T_vals[j]; - } - } - return recv_mat; -} - -template -CSRMatrix* combine_recvs(CSRMatrix* L_mat, CSRMatrix* R_mat, - std::vector& L_vals, std::vector& R_vals, - const int b_rows, const int b_cols, - NonContigData* local_L_recv, NonContigData* local_R_recv, - std::vector& row_sizes) -{ - int row; - int start, end; - - CSRMatrix* recv_mat; - std::vector& vals = create_mat(L_mat->n_rows + R_mat->n_rows, -1, b_rows, b_cols, - &recv_mat); - recv_mat->nnz = L_mat->nnz + R_mat->nnz; - int ptr; - if (recv_mat->nnz) - { - recv_mat->idx2.resize(recv_mat->nnz); - if (L_vals.size() || R_vals.size()) - vals.resize(recv_mat->nnz); - } - - for (int i = 0; i < R_mat->n_rows; i++) - { - start = R_mat->idx1[i]; - end = R_mat->idx1[i+1]; - row = local_R_recv->indices[i]; - row_sizes[row] = end - start; - } - for (int i = 0; i < L_mat->n_rows; i++) - { - start = L_mat->idx1[i]; - end = L_mat->idx1[i+1]; - row = local_L_recv->indices[i]; - row_sizes[row] = end - start; - } - recv_mat->idx1[0] = 0; - for (int i = 0; i < recv_mat->n_rows; i++) - { - recv_mat->idx1[i+1] = recv_mat->idx1[i] + row_sizes[i]; - row_sizes[i] = 0; - } - for (int i = 0; i < R_mat->n_rows; i++) - { - start = R_mat->idx1[i]; - end = R_mat->idx1[i+1]; - row = local_R_recv->indices[i]; - for (int j = start; j < end; j++) - { - ptr = recv_mat->idx1[row] + row_sizes[row]++; - recv_mat->idx2[ptr] = R_mat->idx2[j]; - if (vals.size()) - vals[ptr] = R_mat->copy_val(R_vals[j]); - } - } - for (int i = 0; i < L_mat->n_rows; i++) - { - start = L_mat->idx1[i]; - end = L_mat->idx1[i+1]; - row = local_L_recv->indices[i]; - for (int j = start; j < end; j++) - { - ptr = recv_mat->idx1[row] + row_sizes[row]++; - recv_mat->idx2[ptr] = L_mat->idx2[j]; - if (vals.size()) - vals[ptr] = L_mat->copy_val(L_vals[j]); - } - } - - return recv_mat; -} - -template -CSRMatrix* combine_recvs_T(CSRMatrix* L_mat, CSRMatrix* final_mat, - NonContigData* local_L_send, NonContigData* final_send, - std::vector& L_vals, std::vector& final_vals, - int n, int b_rows, int b_cols) -{ - int row_start, row_end, row_size; - int row, idx; - - CSRMatrix* recv_mat; - std::vector& vals = create_mat(n, -1, b_rows, b_cols, - &recv_mat); - - std::vector row_sizes(n, 0); - int nnz = L_mat->nnz + final_mat->nnz; - if (nnz) - { - recv_mat->idx2.resize(nnz); - if (L_vals.size() || final_vals.size()) - vals.resize(nnz); - } - for (int i = 0; i < final_send->size_msgs; i++) - { - row = final_send->indices[i]; - row_size = final_mat->idx1[i+1] - final_mat->idx1[i]; - row_sizes[row] += row_size; - } - for (int i = 0; i < local_L_send->size_msgs; i++) - { - row = local_L_send->indices[i]; - row_size = L_mat->idx1[i+1] - L_mat->idx1[i]; - row_sizes[row] += row_size; - } - recv_mat->idx1[0] = 0; - for (int i = 0; i < n; i++) - { - recv_mat->idx1[i+1] = recv_mat->idx1[i] + row_sizes[i]; - row_sizes[i] = 0; - } - for (int i = 0; i < final_send->size_msgs; i++) - { - row = final_send->indices[i]; - row_start = final_mat->idx1[i]; - row_end = final_mat->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - idx = recv_mat->idx1[row] + row_sizes[row]++; - recv_mat->idx2[idx] = final_mat->idx2[j]; - if (final_vals.size()) - vals[idx] = final_vals[j]; - } - } - for (int i = 0; i < local_L_send->size_msgs; i++) - { - row = local_L_send->indices[i]; - row_start = L_mat->idx1[i]; - row_end = L_mat->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - idx = recv_mat->idx1[row] + row_sizes[row]++; - recv_mat->idx2[idx] = L_mat->idx2[j]; - if (L_vals.size()) - vals[idx] = L_vals[j]; - } - } - recv_mat->nnz = recv_mat->idx2.size(); - recv_mat->sort(); - - return recv_mat; -} - - - diff --git a/raptor/core/comm_pkg.cpp b/raptor/core/comm_pkg.cpp deleted file mode 100644 index 5f27de8f..00000000 --- a/raptor/core/comm_pkg.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "comm_pkg.hpp" -#include "par_matrix.hpp" -#include "utilities.hpp" - -namespace raptor -{ - template<> - std::vector& CommPkg::get_buffer() - { - return get_double_buffer(); - } - template<> - std::vector& CommPkg::get_buffer() - { - return get_int_buffer(); - } - - template<> - std::vector& CommPkg::communicate(const double* values, - const int block_size) - { - init_double_comm(values, block_size); - return complete_double_comm(block_size); - } - template<> - std::vector& CommPkg::communicate(const int* values, - const int block_size) - { - init_int_comm(values, block_size); - return complete_int_comm(block_size); - } - - template<> - void CommPkg::init_comm(const double* values, - const int block_size) - { - init_double_comm(values, block_size); - } - template<> - void CommPkg::init_comm(const int* values, const int block_size) - { - init_int_comm(values, block_size); - } - - template<> - std::vector& CommPkg::complete_comm(const int block_size) - { - return complete_double_comm(block_size); - } - template<> - std::vector& CommPkg::complete_comm(const int block_size) - { - return complete_int_comm(block_size); - } - - template<> - void CommPkg::communicate_T(const double* values, - std::vector& result, - const int block_size, - std::function result_func, - std::function init_result_func, - double init_result_func_val) - { - init_double_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::communicate_T(const double* values, - std::vector& result, - const int block_size, - std::function result_func, - std::function init_result_func, - double init_result_func_val) - { - init_double_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::communicate_T(const int* values, - std::vector& result, - const int block_size, - std::function result_func, - std::function init_result_func, - int init_result_func_val) - { - init_int_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_int_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::communicate_T(const int* values, - std::vector& result, - const int block_size, - std::function result_func, - std::function init_result_func, - int init_result_func_val) - { - init_int_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_int_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::communicate_T(const double* values, - const int block_size, - std::function init_result_func, - double init_result_func_val) - { - init_double_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_double_comm_T(block_size, init_result_func, init_result_func_val); - } - template<> - void CommPkg::communicate_T(const int* values, - const int block_size, - std::function init_result_func, - int init_result_func_val) - { - init_int_comm_T(values, block_size, init_result_func, init_result_func_val); - complete_int_comm_T(block_size, init_result_func, init_result_func_val); - } - - template<> - void CommPkg::init_comm_T(const double* values, - const int block_size, - std::function init_result_func, - double init_result_func_val) - { - init_double_comm_T(values, block_size, init_result_func, init_result_func_val); - } - template<> - void CommPkg::init_comm_T(const int* values, - const int block_size, - std::function init_result_func, - int init_result_func_val) - { - init_int_comm_T(values, block_size, init_result_func, init_result_func_val); - } - - template<> - void CommPkg::complete_comm_T(span result, - const int block_size, - std::function result_func, - std::function init_result_func, - double init_result_func_val) - { - complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::complete_comm_T(span result, - const int block_size, - std::function result_func, - std::function init_result_func, - double init_result_func_val) - { - complete_double_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::complete_comm_T(span result, - const int block_size, - std::function result_func, - std::function init_result_func, - int init_result_func_val) - { - complete_int_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::complete_comm_T(span result, - const int block_size, - std::function result_func, - std::function init_result_func, - int init_result_func_val) - { - complete_int_comm_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - template<> - void CommPkg::complete_comm_T(const int block_size, - std::function init_result_func, - double init_result_func_val) - { - complete_double_comm_T(block_size, init_result_func, init_result_func_val); - } - template<> - void CommPkg::complete_comm_T(const int block_size, - std::function init_result_func, - int init_result_func_val) - { - complete_int_comm_T(block_size, init_result_func, init_result_func_val); - } -} - - -using namespace raptor; - -std::vector& CommPkg::communicate(ParVector& v, const int block_size) -{ - init_double_comm(v.local.data(), block_size); - return complete_double_comm(block_size); -} - -void CommPkg::init_comm(ParVector& v, const int block_size) -{ - init_double_comm(v.local.data(), block_size); -} diff --git a/raptor/core/comm_pkg.hpp b/raptor/core/comm_pkg.hpp deleted file mode 100644 index e7d75458..00000000 --- a/raptor/core/comm_pkg.hpp +++ /dev/null @@ -1,1841 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_PARCOMM_HPP -#define RAPTOR_CORE_PARCOMM_HPP - -#include -#include "comm_data.hpp" -#include "matrix.hpp" -#include "partition.hpp" -#include "par_vector.hpp" - -#define STANDARD_PPN 4 -#define STANDARD_PROC_LAYOUT 1 - -/************************************************************** - ***** CommPkg Class: - ************************************************************** - ***** This class constructs a parallel communicator, containing - ***** which messages must be sent/recieved for matrix operations - ***** - ***** Methods - ***** ------- - ***** communicate(data_t* values) - ***** Communicates values to processes, based on underlying - ***** communication package - ***** form_col_to_proc(...) - ***** Maps each column in off_proc_column_map to process - ***** on which corresponding values are stored - **************************************************************/ -namespace raptor -{ - class ParCSRMatrix; - class ParBSRMatrix; - - class CommPkg - { - public: - CommPkg(Partition* partition) - { - topology = partition->topology; - topology->num_shared++; - num_shared = 0; - } - - CommPkg(Topology* _topology) - { - topology = _topology; - topology->num_shared++; - num_shared = 0; - } - - virtual ~CommPkg() - { - if (topology) - { - if (topology->num_shared) - { - topology->num_shared--; - } - else - { - delete topology; - } - } - } - - void delete_comm() - { - if (num_shared == 0) - delete this; - else num_shared--; - } - - // Matrix Communication - // TODO -- Block transpose communication - // -- Should b_rows / b_cols be switched? - virtual CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true) = 0; - - virtual CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true) = 0; - virtual CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true) = 0; - virtual void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) = 0; - virtual void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) = 0; - virtual CSRMatrix* complete_mat_comm_T(const int n_result_rows, - const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true) = 0; - - std::vector& get_vals(CSRMatrix* A) - { - return A->vals; - } - std::vector get_vals(BSRMatrix* A) - { - return A->block_vals; - } - - CSRMatrix* communicate_sparsity(ParCSRMatrix* A) - { - return communicate(A, false); - } - - CSRMatrix* communicate(ParCSRMatrix* A, const bool has_vals = true); - CSRMatrix* communicate(ParBSRMatrix* A, const bool has_vals = true); - void init_par_mat_comm(ParCSRMatrix* A, std::vector& send_buffer, - const bool has_vals = true); - void init_par_mat_comm(ParBSRMatrix* A, std::vector& send_buffer, - const bool has_vals = true); - - CSRMatrix* communicate(CSRMatrix* A, const int has_vals = true) - { - return communicate(A->idx1, A->idx2, get_vals(A), A->b_rows, A->b_cols, has_vals); - } - CSRMatrix* communicate_T(CSRMatrix* A, const int has_vals = true) - { - return communicate_T(A->idx1, A->idx2, get_vals(A), A->n_rows, A->b_rows, - A->b_cols, has_vals); - } - - // Vector Communication - std::vector& communicate(ParVector& v, const int block_size = 1); - void init_comm(ParVector& v, const int block_size = 1); - - // Standard Communication - template - std::vector& communicate(const std::vector& values, const int block_size = 1) - { - return communicate(values.data(), block_size); - } - template - void init_comm(const std::vector& values, const int block_size = 1) - { - init_comm(values.data(), block_size); - } - template void init_comm(const T* values, const int block_size = 1); - template std::vector& complete_comm(const int block_size = 1); - template std::vector& communicate(const T* values, const int block_size = 1); - virtual void init_double_comm(const double* values, const int block_size) = 0; - virtual void init_int_comm(const int* values, const int block_size) = 0; - virtual std::vector& complete_double_comm(const int block_size) = 0; - virtual std::vector& complete_int_comm(const int block_size) = 0; - - // Transpose Communication - template - void communicate_T(const std::vector& values, std::vector& result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - communicate_T(values.data(), result, block_size, result_func, - init_result_func, init_result_func_val); - } - template - void communicate_T(const std::vector& values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - communicate_T(values.data(), block_size, init_result_func, - init_result_func_val); - } - template - void init_comm_T(const std::vector& values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - init_comm_T(values.data(), block_size, init_result_func, init_result_func_val); - } - template void init_comm_T(const T* values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - template void complete_comm_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - template void complete_comm_T( - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - template void communicate_T(const T* values, - std::vector& result, const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - template void communicate_T(const T* values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0); - virtual void init_double_comm_T(const double* values, - const int block_size, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) = 0; - virtual void init_int_comm_T(const int* values, - const int block_size, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) = 0; - virtual void complete_double_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, double init_result_func_val = 0) = 0; - virtual void complete_double_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, double init_result_func_val = 0) = 0; - virtual void complete_int_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) = 0; - virtual void complete_int_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) = 0; - virtual void complete_double_comm_T(const int block_size, - std::function init_result_func = - &sum_func, double init_result_func_val = 0) = 0; - virtual void complete_int_comm_T(const int block_size, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) = 0; - - // Helper methods - template std::vector& get_buffer(); - virtual std::vector& get_double_buffer() = 0; - virtual std::vector& get_int_buffer() = 0; - - // Class Variables - Topology* topology; - std::vector buffer; - std::vector int_buffer; - int num_shared; - }; - - - /************************************************************** - ***** ParComm Class - ************************************************************** - ***** This class constructs a standard parallel communicator: - ***** which messages must be sent/recieved for matrix operations - ***** - ***** Attributes - ***** ------------- - ***** num_sends : index_t - ***** Number of messages this process must send during - ***** matrix operations - ***** num_recvs : index_t - ***** Number of messages this process will recv during - ***** matrix operations - ***** size_sends : index_t - ***** Total number of elements this process sends in all - ***** messages - ***** size_recvs : index_t - ***** Total number of elements this process recvs from - ***** all messages - ***** send_procs : std::vector - ***** Distant processes messages are to be sent to - ***** send_row_starts : std::vector - ***** Pointer to first position in send_row_indices - ***** that a given process will send. - ***** send_row_indices : std::vector - ***** The indices of values that must be sent to each - ***** process in send_procs - ***** recv_procs : std::vector - ***** Distant processes messages are to be recvd from - ***** recv_col_starts : std::vector - ***** Pointer to first column recvd from each process - ***** in recv_procs - ***** col_to_proc : std::vector - ***** Maps each local column in the off-diagonal block - ***** to the process that holds corresponding data - **************************************************************/ - class ParComm : public CommPkg - { - public: - /************************************************************** - ***** ParComm Class Constructor - ************************************************************** - ***** Initializes an empty ParComm, setting send and recv - ***** sizes to 0 - ***** - ***** Parameters - ***** ------------- - ***** _key : int (optional) - ***** Tag to be used in RAPtor_MPI Communication (default 0) - **************************************************************/ - ParComm(Partition* partition, int _key = 0, - RAPtor_MPI_Comm _comm = RAPtor_MPI_COMM_WORLD, - CommData* r_data = NULL) : CommPkg(partition) - { - mpi_comm = _comm; - key = _key; - send_data = new NonContigData(); - if (r_data) - recv_data = r_data; - else - recv_data = new ContigData(); - } - - ParComm(Topology* topo, int _key = 0, - RAPtor_MPI_Comm _comm = RAPtor_MPI_COMM_WORLD, - CommData* r_data = NULL) : CommPkg(topo) - { - mpi_comm = _comm; - key = _key; - send_data = new NonContigData(); - if (r_data) - recv_data = r_data; - else - recv_data = new ContigData(); - } - - /************************************************************** - ***** ParComm Class Constructor - ************************************************************** - ***** Initializes a ParComm object based on the off_proc Matrix - ***** - ***** Parameters - ***** ------------- - ***** off_proc_column_map : std::vector& - ***** Maps local off_proc columns indices to global - ***** _key : int (optional) - ***** Tag to be used in RAPtor_MPI Communication (default 9999) - **************************************************************/ - ParComm(Partition* partition, - const std::vector& off_proc_column_map, - int _key = 9999, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD, - CommData* r_data = NULL) : CommPkg(partition) - { - mpi_comm = comm; - std::vector off_proc_col_to_proc(off_proc_column_map.size()); - partition->form_col_to_proc(off_proc_column_map, off_proc_col_to_proc); - init_par_comm(off_proc_column_map, off_proc_col_to_proc, _key, comm, r_data); - for (int i = 0; i < send_data->size_msgs; i++) - { - send_data->indices[i] -= partition->first_local_col; - } - } - - ParComm(Partition* partition, - const std::vector& off_proc_column_map, - const std::vector& on_proc_column_map, - int _key = 9999, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD, - CommData* r_data = NULL) : CommPkg(partition) - { - mpi_comm = comm; - int idx; - int ctr = 0; - std::vector part_col_to_new; - std::vector off_proc_col_to_proc(off_proc_column_map.size()); - partition->form_col_to_proc(off_proc_column_map, off_proc_col_to_proc); - - init_par_comm(off_proc_column_map, off_proc_col_to_proc, _key, comm, r_data); - for (int i = 0; i < send_data->size_msgs; i++) - { - send_data->indices[i] -= partition->first_local_col; - } - - if (partition->local_num_cols) - { - part_col_to_new.resize(partition->local_num_cols, -1); - } - for (std::vector::const_iterator it = on_proc_column_map.begin(); - it != on_proc_column_map.end(); ++it) - { - part_col_to_new[*it - partition->first_local_col] = ctr++; - } - - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i]; - send_data->indices[i] = part_col_to_new[idx]; - assert(part_col_to_new[idx] >= 0); - } - - } - - ParComm(Topology* _topology, - const std::vector& off_proc_column_map, - const std::vector& off_proc_col_to_proc, - const std::vector& local_row_map, - int _key = 9999, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD, - CommData* r_data = NULL) : CommPkg(_topology) - { - mpi_comm = comm; - init_par_comm(off_proc_column_map, off_proc_col_to_proc, - _key, comm, r_data); - std::map global_to_local; - for (int i = 0; i < (int)local_row_map.size(); i++) - { - global_to_local[local_row_map[i]] = i; - } - for (int i = 0; i < send_data->size_msgs; i++) - { - send_data->indices[i] = global_to_local[send_data->indices[i]]; - } - - } - - void init_par_comm(const std::vector& off_proc_column_map, - const std::vector& off_proc_col_to_proc, - int _key, RAPtor_MPI_Comm comm, - CommData* r_data = NULL) - { - // Get RAPtor_MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(comm, &rank); - RAPtor_MPI_Comm_size(comm, &num_procs); - - // Initialize class variables - key = _key; - - send_data = new NonContigData(); - - if (r_data) - recv_data = r_data; - else - recv_data = new ContigData(); - - // Declare communication variables - int proc, prev_proc; - int tag = 12345; // TODO -- switch this to key? - int off_proc_num_cols = off_proc_column_map.size(); - - std::vector tmp_send_buffer; - - - // Determine processes columns are received from, - // and adds corresponding messages to recv data. - // Assumes columns are partitioned across processes - // in contiguous blocks, and are sorted - if (off_proc_num_cols) - { - prev_proc = off_proc_col_to_proc[0]; - int prev_idx = 0; - for (int i = 1; i < off_proc_num_cols; i++) - { - proc = off_proc_col_to_proc[i]; - if (proc != prev_proc) - { - recv_data->add_msg(prev_proc, i - prev_idx); - prev_proc = proc; - prev_idx = i; - } - } - recv_data->add_msg(prev_proc, off_proc_num_cols - prev_idx); - recv_data->finalize(); - } - - // For each process I recv from, send the global column indices - // for which I must recv corresponding rows - std::vector recv_sizes(num_procs, 0); - for (int i = 0; i < recv_data->num_msgs; i++) - recv_sizes[recv_data->procs[i]] = - recv_data->indptr[i+1] - recv_data->indptr[i]; - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, recv_sizes.data(), num_procs, RAPtor_MPI_INT, - RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); - if (profile) vec_t -= RAPtor_MPI_Wtime(); - recv_data->send(off_proc_column_map.data(), tag, comm); - send_data->probe(recv_sizes[rank], tag, comm); - recv_data->waitall(); - if (profile) vec_t += RAPtor_MPI_Wtime(); - } - - ParComm(ParComm* comm) : CommPkg(comm->topology) - { - mpi_comm = comm->mpi_comm; - send_data = comm->send_data->copy(); - recv_data = comm->recv_data->copy(); - key = comm->key; - } - - ParComm(ParComm* comm, const std::vector& off_proc_col_to_new) - : CommPkg(comm->topology) - { - mpi_comm = comm->mpi_comm; - - if (comm == NULL) - { - key = 0; - return; - } - key = comm->key; - - init_off_proc_new(comm, off_proc_col_to_new); - } - - ParComm(ParComm* comm, const std::vector& on_proc_col_to_new, - const std::vector& off_proc_col_to_new) - : CommPkg(comm->topology) - { - mpi_comm = comm->mpi_comm; - int idx, new_idx; - - if (comm == NULL) - { - key = 0; - return; - } - key = comm->key; - - init_off_proc_new(comm, off_proc_col_to_new); - - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i]; - new_idx = on_proc_col_to_new[idx]; - if (new_idx != -1) - { - send_data->indices[i] = new_idx; - } - } - } - - - void init_off_proc_new(ParComm* comm, const std::vector& off_proc_col_to_new) - { - bool comm_proc; - int proc, start, end; - - std::function compare_func = [](const int a, const int b) - { - if (b >= 0) return b; - else return a; - }; - comm->communicate_T(off_proc_col_to_new, 1, compare_func, -1); - - recv_data = comm->recv_data->copy(off_proc_col_to_new); - - send_data = new NonContigData(); - for (int i = 0; i < comm->send_data->num_msgs; i++) - { - comm_proc = false; - proc = comm->send_data->procs[i]; - start = comm->send_data->indptr[i]; - end = comm->send_data->indptr[i+1]; - for (int j = start; j < end; j++) - { - if (comm->send_data->int_buffer[j] != -1) - { - comm_proc = true; - send_data->indices.emplace_back(comm->send_data->indices[j]); - } - } - if (comm_proc) - { - send_data->procs.emplace_back(proc); - send_data->indptr.emplace_back(send_data->indices.size()); - } - } - send_data->num_msgs = send_data->procs.size(); - send_data->size_msgs = send_data->indices.size(); - send_data->finalize(); - - - } - - /************************************************************** - ***** ParComm Class Destructor - ************************************************************** - ***** - **************************************************************/ - ~ParComm() - { - delete send_data; - delete recv_data; - } - - // Standard Communication - void init_double_comm(const double* values, const int block_size = 1) - { - initialize(values, block_size); - } - void init_int_comm(const int* values, const int block_size = 1) - { - initialize(values); - } - std::vector& complete_double_comm(const int block_size = 1) - { - return complete(block_size); - } - std::vector& complete_int_comm(const int block_size = 1) - { - return complete(block_size); - } - template - std::vector& communicate(const std::vector& values, - const int block_size = 1) - { - return CommPkg::communicate(values.data(), block_size); - } - template - std::vector& communicate(const T* values, const int block_size = 1) - { - return CommPkg::communicate(values, block_size); - } - - template - void initialize(const T* values, const int block_size = 1) - { - if (profile) vec_t -= RAPtor_MPI_Wtime(); - send_data->send(values, key, mpi_comm, block_size); - recv_data->recv(key, mpi_comm, block_size); - if (profile) vec_t += RAPtor_MPI_Wtime(); - } - - template - std::vector& complete(const int block_size = 1) - { - if (profile) vec_t -= RAPtor_MPI_Wtime(); - send_data->waitall(); - recv_data->waitall(); - if (profile) vec_t += RAPtor_MPI_Wtime(); - key++; - - // Extract packed data to appropriate buffer - std::vector& buf = recv_data->get_buffer(); - - return buf; - } - - // Transpose Communication - void init_double_comm_T(const double* values, - const int block_size = 1, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - initialize_T(values, block_size, init_result_func, init_result_func_val); - } - void init_int_comm_T(const int* values, - const int block_size = 1, - std::function init_result_func = - &sum_func, - int init_result_func_val = 0) - { - initialize_T(values, block_size, init_result_func, init_result_func_val); - } - void complete_double_comm_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_double_comm_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_int_comm_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_int_comm_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_double_comm_T(const int block_size = 1, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(block_size, init_result_func, init_result_func_val); - } - void complete_int_comm_T(const int block_size = 1, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - complete_T(block_size, init_result_func, init_result_func_val); - } - template - void communicate_T(const std::vector& values, std::vector& result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values.data(), result, block_size, - result_func, init_result_func, init_result_func_val); - } - template - void communicate_T(const T* values, std::vector& result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values, result, block_size, - result_func, init_result_func, init_result_func_val); - } - template - void communicate_T(const std::vector& values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values.data(), block_size, init_result_func, - init_result_func_val); - } - template - void communicate_T(const T* values, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values, block_size, init_result_func, init_result_func_val); - } - - template - void initialize_T(const T* values, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - if (profile) vec_t -= RAPtor_MPI_Wtime(); - recv_data->send(values, key, mpi_comm, block_size, init_result_func, init_result_func_val); - send_data->recv(key, mpi_comm, block_size); - if (profile) vec_t += RAPtor_MPI_Wtime(); - } - - template - void complete_T(span result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - // TODO - dont need to copy into sendbuf first - complete_T(block_size, init_result_func, init_result_func_val); - - int idx, pos; - std::vector& sendbuf = send_data->get_buffer(); - - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i] * block_size; - pos = i * block_size; - for (int j = 0; j < block_size; j++) - { - result[idx + j] = result_func(result[idx + j], sendbuf[pos + j]); - } - } - } - - template - void complete_T(const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - if (profile) vec_t -= RAPtor_MPI_Wtime(); - send_data->waitall(); - recv_data->waitall(); - if (profile) vec_t += RAPtor_MPI_Wtime(); - key++; - } - - // Conditional communication - template - std::vector& conditional_comm( - const std::vector& vals, - const std::vector& states, - const std::vector& off_proc_states, - std::function compare_func, - const int block_size = 1) - { - int ctr, n_sends, n_recvs; - int tag = 325493; - bool comparison; - - if (profile) vec_t -= RAPtor_MPI_Wtime(); - send_data->send(vals.data(), tag, mpi_comm, states, compare_func, &n_sends, block_size); - recv_data->recv(tag, mpi_comm, off_proc_states, - compare_func, &ctr, &n_recvs, block_size); - - send_data->waitall(n_sends); - recv_data->waitall(n_recvs); - if (profile) vec_t += RAPtor_MPI_Wtime(); - - std::vector& recvbuf = recv_data->get_buffer(); - - ctr--; - for (int i = recv_data->size_msgs - 1; i >= 0; i--) - { - int idx = i * block_size; - comparison = false; - for (int j = 0; j < block_size; j++) - { - if (compare_func(off_proc_states[idx+j])) - { - comparison = true; - break; - } - } - if (comparison) - { - for (int j = block_size - 1; j >= 0; j--) - { - recvbuf[idx+j] = recvbuf[ctr--]; - } - } - else - { - for (int j = block_size - 1; j >= 0; j--) - { - recvbuf[idx+j] = 0.0; - } - } - } - - return recvbuf; - } - - template - void conditional_comm_T(const std::vector& vals, - const std::vector& states, - const std::vector& off_proc_states, - std::function compare_func, - std::vector& result, - std::function result_func, - const int block_size = 1) - { - int idx, ctr; - int n_sends, n_recvs; - int tag = 453246; - bool comparison; - - if (profile) vec_t -= RAPtor_MPI_Wtime(); - recv_data->send(vals.data(), tag, mpi_comm, off_proc_states, compare_func, - &n_sends, block_size); - send_data->recv(tag, mpi_comm, states, compare_func, &ctr, &n_recvs, block_size); - - recv_data->waitall(n_sends); - send_data->waitall(n_recvs); - if (profile) vec_t += RAPtor_MPI_Wtime(); - - std::vector& sendbuf = send_data->get_buffer(); - - ctr = 0; - for (int i = 0; i < send_data->size_msgs; i++) - { - idx = send_data->indices[i] * block_size; - comparison = false; - for (int j = 0; j < block_size; j++) - { - if (compare_func(states[idx + j])) - { - comparison = true; - break; - } - } - if (comparison) - { - for (int j = 0; j < block_size; j++) - { - result[idx + j] = result_func(result[idx + j], sendbuf[ctr++]); - } - } - } - } - - - // Matrix Communication - CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) ; - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) ; - CSRMatrix* complete_mat_comm_T(const int n_result_rows, - const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true) ; - - - CSRMatrix* communicate(ParCSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate(ParBSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate(CSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate_T(CSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate_T(A, has_vals); - } - - - // Vector Communication - std::vector& communicate(ParVector& v, const int block_size = 1) - { - return CommPkg::communicate(v, block_size); - } - void init_comm(ParVector& v, const int block_size = 1) - { - CommPkg::init_comm(v, block_size); - } - - // Helper Methods - std::vector& get_double_buffer() - { - return recv_data->buffer; - } - std::vector& get_int_buffer() - { - return recv_data->int_buffer; - } - - int key; - NonContigData* send_data; - CommData* recv_data; - RAPtor_MPI_Comm mpi_comm; - }; - - - - /************************************************************** - ***** TAPComm Class - ************************************************************** - ***** This class constructs a topology-aware parallel communicator: - ***** which messages must be sent/recieved for matrix operations, - ***** using topology-aware methods to limit the number and size - ***** of inter-node messages - ***** - ***** Attributes - ***** ------------- - ***** local_S_par_comm : ParComm* - ***** Parallel communication package for sending data that originates - ***** on rank to other processes local to node, before inter-node - ***** communication occurs. - ***** local_R_par_comm : ParComm* - ***** Parallel communication package for redistributing previously - ***** received values (from inter-node communication step) to - ***** processes local to rank which need said values - ***** local_L_par_comm : ParComm* - ***** Parallel communication package for communicating values - ***** that both originate and have a final destination on node - ***** (fully intra-node communication) - ***** global_par_comm : ParComm* - ***** Parallel communication package for sole inter-node step. - ***** buffer : Vector - ***** Combination of local_L_par_comm and local_R_par_comm - ***** recv buffers, ordered to match off_proc_column_map - ***** Partition* partition - ***** Partition, holding information about topology - **************************************************************/ - class TAPComm : public CommPkg - { - public: - - TAPComm(Partition* partition, bool form_S = true, ParComm* L_comm = NULL) : CommPkg(partition) - { - if (form_S) - { - local_S_par_comm = new ParComm(partition, 2345, partition->topology->local_comm, - new DuplicateData()); - } - else local_S_par_comm = NULL; - - local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, - new NonContigData()); - global_par_comm = new ParComm(partition, 5678, RAPtor_MPI_COMM_WORLD, - new DuplicateData()); - - if (L_comm) - { - local_L_par_comm = L_comm; - local_L_par_comm->num_shared++; - } - else - { - local_L_par_comm = new ParComm(partition, 4567, partition->topology->local_comm, - new NonContigData()); - } - } - - - /************************************************************** - ***** TAPComm Class Constructor - ************************************************************** - ***** Initializes a TAPComm for a matrix without contiguous - ***** row-wise partitions across processes. Instead, each - ***** process holds a random assortment of rows. - ***** - ***** Parameters - ***** ------------- - ***** off_proc_column_map : std::vector& - ***** Maps local off_proc columns indices to global - ***** global_num_cols : int - ***** Number of global columns in matrix - ***** local_num_cols : int - ***** Number of columns local to rank - **************************************************************/ - TAPComm(Partition* partition, - const std::vector& off_proc_column_map, - bool form_S = true, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD) - : CommPkg(partition) - { - if (form_S) - { - init_tap_comm(partition, off_proc_column_map, comm); - } - else - { - init_tap_comm_simple(partition, off_proc_column_map, comm); - } - } - - TAPComm(Partition* partition, - const std::vector& off_proc_column_map, - const std::vector& on_proc_column_map, - bool form_S = true, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD) - : CommPkg(partition) - { - std::vector on_proc_to_new; - int on_proc_num_cols = on_proc_column_map.size(); - if (partition->local_num_cols) - { - on_proc_to_new.resize(partition->local_num_cols); - for (int i = 0; i < on_proc_num_cols; i++) - { - on_proc_to_new[on_proc_column_map[i] - partition->first_local_col] = i; - } - } - - if (form_S) - { - init_tap_comm(partition, off_proc_column_map, comm); - - for (std::vector::iterator it = local_S_par_comm->send_data->indices.begin(); - it != local_S_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } - } - else - { - init_tap_comm_simple(partition, off_proc_column_map, comm); - - for (std::vector::iterator it = global_par_comm->send_data->indices.begin(); - it != global_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } - } - - for (std::vector::iterator it = local_L_par_comm->send_data->indices.begin(); - it != local_L_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } - } - - /************************************************************** - ***** TAPComm Class Constructor - ************************************************************** - ***** Create topology-aware communication class from - ***** original communication package (which processes rank - ***** communication which, and what is sent to / recv from - ***** each process. - ***** - ***** Parameters - ***** ------------- - ***** orig_comm : ParComm* - ***** Existing standard communication package from which - ***** to form topology-aware communicator - **************************************************************/ - TAPComm(TAPComm* tap_comm) : CommPkg(tap_comm->topology) - { - if (tap_comm->local_S_par_comm) - { - local_S_par_comm = new ParComm(tap_comm->local_S_par_comm); - } - else local_S_par_comm = NULL; - - global_par_comm = new ParComm(tap_comm->global_par_comm); - local_R_par_comm = new ParComm(tap_comm->local_R_par_comm); - local_L_par_comm = new ParComm(tap_comm->local_L_par_comm); - - recv_size = tap_comm->recv_size; - if (recv_size) - { - buffer.resize(recv_size); - int_buffer.resize(recv_size); - } - } - - TAPComm(TAPComm* tap_comm, const std::vector& off_proc_col_to_new, - ParComm* local_L = NULL) : CommPkg(tap_comm->topology) - { - init_off_proc_new(tap_comm, off_proc_col_to_new, local_L); - } - - TAPComm(TAPComm* tap_comm, const std::vector& on_proc_col_to_new, - const std::vector& off_proc_col_to_new, - ParComm* local_L = NULL) : CommPkg(tap_comm->topology) - { - int idx; - - init_off_proc_new(tap_comm, off_proc_col_to_new, local_L); - - if (!local_L) - { - for (int i = 0; i < local_L_par_comm->send_data->size_msgs; i++) - { - idx = local_L_par_comm->send_data->indices[i]; - local_L_par_comm->send_data->indices[i] = on_proc_col_to_new[idx]; - } - } - - if (local_S_par_comm) - { - for (int i = 0; i < local_S_par_comm->send_data->size_msgs; i++) - { - idx = local_S_par_comm->send_data->indices[i]; - local_S_par_comm->send_data->indices[i] = on_proc_col_to_new[idx]; - } - } - else - { - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - idx = global_par_comm->send_data->indices[i]; - global_par_comm->send_data->indices[i] = on_proc_col_to_new[idx]; - } - } - } - - - void init_off_proc_new(TAPComm* tap_comm, const std::vector& off_proc_col_to_new, - ParComm* local_L = NULL) - { - int idx, ctr; - int start, end; - - DuplicateData* global_recv = (DuplicateData*) tap_comm->global_par_comm->recv_data; - - if (local_L) - { - local_L_par_comm = local_L; - local_L_par_comm->num_shared++; - } - else - { - local_L_par_comm = new ParComm(tap_comm->local_L_par_comm, off_proc_col_to_new); - } - local_R_par_comm = new ParComm(tap_comm->local_R_par_comm, off_proc_col_to_new); - - // Create global par comm / update R send indices - std::vector& local_R_int_buffer = - tap_comm->local_R_par_comm->send_data->get_buffer(); - std::vector& global_int_buffer = - tap_comm->global_par_comm->send_data->get_buffer(); - - std::vector G_to_new(tap_comm->global_par_comm->recv_data->size_msgs, -1); - ctr = 0; - for (int i = 0; i < global_recv->size_msgs; i++) - { - start = global_recv->indptr_T[i]; - end = global_recv->indptr_T[i+1]; - for (int j = start; j < end; j++) - { - idx = global_recv->indices[j]; - if (local_R_int_buffer[idx] != -1) - { - G_to_new[i] = ctr++; - break; - } - } - } - for (std::vector::iterator it = local_R_par_comm->send_data->indices.begin(); - it != local_R_par_comm->send_data->indices.end(); ++it) - { - *it = G_to_new[*it]; - } - idx = 0; - for (std::vector::iterator it = local_R_int_buffer.begin(); - it != local_R_int_buffer.end(); ++it) - { - if (*it != -1) *it = idx++; - } - - global_par_comm = new ParComm(tap_comm->global_par_comm, - local_R_int_buffer); - - - // create local S / update global send indices - if (tap_comm->local_S_par_comm) - { - DuplicateData* local_S_recv = (DuplicateData*) tap_comm->local_S_par_comm->recv_data; - std::vector S_to_new(tap_comm->local_S_par_comm->recv_data->size_msgs, -1); - ctr = 0; - for (int i = 0; i < local_S_recv->size_msgs; i++) - { - start = local_S_recv->indptr_T[i]; - end = local_S_recv->indptr_T[i+1]; - for (int j = start; j < end; j++) - { - idx = local_S_recv->indices[j]; - if (global_int_buffer[idx] != -1) - { - S_to_new[i] = ctr++; - break; - } - } - } - for (std::vector::iterator it = global_par_comm->send_data->indices.begin(); - it != global_par_comm->send_data->indices.end(); ++it) - { - *it = S_to_new[*it]; - } - idx = 0; - for (std::vector::iterator it = global_int_buffer.begin(); - it != global_int_buffer.end(); ++it) - { - if (*it != -1) *it = idx++; - } - - local_S_par_comm = new ParComm(tap_comm->local_S_par_comm, - global_int_buffer); - } - else local_S_par_comm = NULL; - - // Determine size of final recvs (should be equal to - // number of off_proc cols) - recv_size = local_R_par_comm->recv_data->size_msgs + - local_L_par_comm->recv_data->size_msgs; - if (recv_size) - { - // Want a single recv buffer local_R and local_L par_comms - buffer.resize(recv_size); - int_buffer.resize(recv_size); - } - } - - /************************************************************** - ***** ParComm Class Destructor - ************************************************************** - ***** - **************************************************************/ - ~TAPComm() - { - if (global_par_comm) - global_par_comm->delete_comm(); - if (local_S_par_comm) - local_S_par_comm->delete_comm(); - if (local_R_par_comm) - local_R_par_comm->delete_comm(); - if (local_L_par_comm) - local_L_par_comm->delete_comm(); - } - - void init_tap_comm(Partition* partition, - const std::vector& off_proc_column_map, - RAPtor_MPI_Comm comm) - { - // Get RAPtor_MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(comm, &rank); - RAPtor_MPI_Comm_size(comm, &num_procs); - - // Initialize class variables - local_S_par_comm = new ParComm(partition, 2345, partition->topology->local_comm, - new DuplicateData()); - local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, - new NonContigData()); - local_L_par_comm = new ParComm(partition, 4567, partition->topology->local_comm, - new NonContigData()); - global_par_comm = new ParComm(partition, 5678, comm, new DuplicateData()); - - // Initialize Variables - std::vector off_proc_col_to_proc; - std::vector on_node_column_map; - std::vector on_node_col_to_proc; - std::vector off_node_column_map; - std::vector off_node_col_to_node; - std::vector on_node_to_off_proc; - std::vector off_node_to_off_proc; - std::vector recv_nodes; - std::vector orig_procs; - std::vector node_to_local_proc; - - // Find process on which vector value associated with each column is - // stored - partition->form_col_to_proc(off_proc_column_map, off_proc_col_to_proc); - - // Partition off_proc cols into on_node and off_node - split_off_proc_cols(off_proc_column_map, off_proc_col_to_proc, - on_node_column_map, on_node_col_to_proc, on_node_to_off_proc, - off_node_column_map, off_node_col_to_node, off_node_to_off_proc); - - // Gather all nodes with which any local process must communication - form_local_R_par_comm(off_node_column_map, off_node_col_to_node, - orig_procs); - - // Find global processes with which rank communications - form_global_par_comm(orig_procs); - - // Form local_S_par_comm: initial distribution of values among local - // processes, before inter-node communication - form_local_S_par_comm(orig_procs); - - // Adjust send indices (currently global vector indices) to be index - // of global vector value from previous recv - adjust_send_indices(partition->first_local_col); - - // Form local_L_par_comm: fully local communication (origin and - // destination processes both local to node) - form_local_L_par_comm(on_node_column_map, on_node_col_to_proc, - partition->first_local_col); - - // Determine size of final recvs (should be equal to - // number of off_proc cols) - update_recv(on_node_to_off_proc, off_node_to_off_proc); - } - - void init_tap_comm_simple(Partition* partition, - const std::vector& off_proc_column_map, - RAPtor_MPI_Comm comm) - { - // Get RAPtor_MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(comm, &rank); - RAPtor_MPI_Comm_size(comm, &num_procs); - - // Initialize class variables - local_S_par_comm = NULL; - local_R_par_comm = new ParComm(partition, 3456, partition->topology->local_comm, - new NonContigData()); - local_L_par_comm = new ParComm(partition, 4567, partition->topology->local_comm, - new NonContigData()); - global_par_comm = new ParComm(partition, 5678, comm, new DuplicateData()); - - // Initialize Variables - std::vector off_proc_col_to_proc; - std::vector on_node_column_map; - std::vector on_node_col_to_proc; - std::vector off_node_column_map; - std::vector off_node_col_to_proc; - std::vector on_node_to_off_proc; - std::vector off_node_to_off_proc; - - // Find process on which vector value associated with each column is - // stored - partition->form_col_to_proc(off_proc_column_map, off_proc_col_to_proc); - - // Partition off_proc cols into on_node and off_node - split_off_proc_cols(off_proc_column_map, off_proc_col_to_proc, - on_node_column_map, on_node_col_to_proc, on_node_to_off_proc, - off_node_column_map, off_node_col_to_proc, off_node_to_off_proc); - - // Form local recv communicator. Will recv from local rank - // corresponding to global rank on which data originates. E.g. if - // data is on rank r = (p, n), and my rank is s = (q, m), I will - // recv data from (p, m). - form_simple_R_par_comm(off_node_column_map, off_node_col_to_proc); - - // Form global par comm.. Will recv from proc on which data - // originates - form_simple_global_comm(off_node_col_to_proc); - - // Adjust send indices (currently global vector indices) to be - // index of global vector value from previous recv (only updating - // local_R to match position in global) - adjust_send_indices(partition->first_local_col); - - // Form local_L_par_comm: fully local communication (origin and - // destination processes both local to node) - form_local_L_par_comm(on_node_column_map, on_node_col_to_proc, - partition->first_local_col); - - // Determine size of final recvs (should be equal to - // number of off_proc cols) - update_recv(on_node_to_off_proc, off_node_to_off_proc); - - } - - // Helper methods for forming TAPComm: - void split_off_proc_cols(const std::vector& off_proc_column_map, - const std::vector& off_proc_col_to_proc, - std::vector& on_node_column_map, - std::vector& on_node_col_to_proc, - std::vector& on_node_to_off_proc, - std::vector& off_node_column_map, - std::vector& off_node_col_to_node, - std::vector& off_node_to_off_proc); - void form_local_R_par_comm(const std::vector& off_node_column_map, - const std::vector& off_node_col_to_node, - std::vector& orig_procs); - void form_global_par_comm(std::vector& orig_procs); - void form_local_S_par_comm(std::vector& orig_procs); - void adjust_send_indices(const int first_local_col); - void form_local_L_par_comm(const std::vector& on_node_column_map, - const std::vector& on_node_col_to_proc, - const int first_local_col); - void form_simple_R_par_comm(std::vector& off_node_column_map, - std::vector& off_node_col_to_proc); - void form_simple_global_comm(std::vector& off_node_col_to_proc); - void update_recv(const std::vector& on_node_to_off_proc, - const std::vector& off_node_to_off_proc, bool update_L = true); - - // Class Methods - void init_double_comm(const double* values, const int block_size) - { - initialize(values, block_size); - } - void init_int_comm(const int* values, const int block_size) - { - initialize(values, block_size); - } - std::vector& complete_double_comm(const int block_size) - { - return complete(block_size); - } - std::vector& complete_int_comm(const int block_size) - { - return complete(block_size); - } - - template - std::vector& communicate(const std::vector& values, - const int block_size = 1) - { - return CommPkg::communicate(values.data(), block_size); - } - template - std::vector& communicate(const T* values, - const int block_size = 1) - { - return CommPkg::communicate(values, block_size); - } - - template - void initialize(const T* values, const int block_size = 1) - { - // Messages with origin and final destination on node - local_L_par_comm->communicate(values, block_size); - - if (local_S_par_comm) - { - // Initial redistribution among node - std::vector& S_vals = local_S_par_comm->communicate(values, block_size); - - // Begin inter-node communication - global_par_comm->initialize(S_vals.data(), block_size); - } - else - { - global_par_comm->initialize(values, block_size); - } - } - - template - std::vector& complete(const int block_size = 1) - { - // Complete inter-node communication - std::vector& G_vals = global_par_comm->complete(block_size); - - // Redistributing recvd inter-node values - local_R_par_comm->communicate(G_vals.data(), block_size); - - std::vector& recvbuf = get_buffer(); - - std::vector& R_recvbuf = local_R_par_comm->recv_data->get_buffer(); - std::vector& L_recvbuf = local_L_par_comm->recv_data->get_buffer(); - - if ((int)recvbuf.size() < recv_size * block_size) - recvbuf.resize(recv_size * block_size); - - // Add values from L_recv and R_recv to appropriate positions in - // Vector recv - int idx, pos; - int R_recv_size = local_R_par_comm->recv_data->size_msgs; - int L_recv_size = local_L_par_comm->recv_data->size_msgs; - NonContigData* local_R_recv = (NonContigData*) local_R_par_comm->recv_data; - NonContigData* local_L_recv = (NonContigData*) local_L_par_comm->recv_data; - for (int i = 0; i < R_recv_size; i++) - { - pos = i * block_size; - idx = local_R_recv->indices[i] * block_size; - for (int j = 0; j < block_size; j++) - { - recvbuf[idx + j] = R_recvbuf[pos + j]; - } - } - - for (int i = 0; i < L_recv_size; i++) - { - pos = i * block_size; - idx = local_L_recv->indices[i] * block_size; - for (int j = 0; j < block_size; j++) - { - recvbuf[idx + j] = L_recvbuf[pos + j]; - } - } - - return recvbuf; - } - - - // Transpose Communication - void init_double_comm_T(const double* values, - const int block_size, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - initialize_T(values, block_size, init_result_func, init_result_func_val); - } - void init_int_comm_T(const int* values, - const int block_size, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - initialize_T(values, block_size, init_result_func, init_result_func_val); - } - void complete_double_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_double_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_int_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - void complete_int_comm_T(span result, - const int block_size, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - int init_result_func_val = 0) - { - complete_T(result, block_size, result_func, init_result_func, init_result_func_val); - } - - void complete_double_comm_T(const int block_size, - std::function init_result_func = - &sum_func, - double init_result_func_val = 0) - { - complete_T(block_size, init_result_func, init_result_func_val); - } - void complete_int_comm_T(const int block_size, - std::function init_result_func = - &sum_func, - int init_result_func_val = 0) - { - complete_T(block_size, init_result_func, init_result_func_val); - } - - template - void communicate_T(const std::vector& values, std::vector& result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values.data(), result, block_size, result_func, init_result_func, - init_result_func_val); - } - template - void communicate_T(const T* values, std::vector& result, - const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values, result, block_size, result_func, init_result_func, - init_result_func_val); - } - template - void communicate_T(const std::vector& values, - const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values.data(), block_size, init_result_func, init_result_func_val); - } - template - void communicate_T(const T* values, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - CommPkg::communicate_T(values, block_size, init_result_func, init_result_func_val); - } - - template - void initialize_T(const T* values, const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - // Messages with origin and final destination on node - local_L_par_comm->communicate_T(values, block_size, init_result_func, init_result_func_val); - - // Initial redistribution among node - local_R_par_comm->communicate_T(values, block_size, init_result_func, init_result_func_val); - - // Begin inter-node communication - std::vector& R_sendbuf = local_R_par_comm->send_data->get_buffer(); - global_par_comm->init_comm_T(R_sendbuf, block_size, init_result_func, init_result_func_val); - } - - template - void complete_T(span result, const int block_size = 1, - std::function result_func = &sum_func, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - complete_T(block_size, init_result_func, init_result_func_val); - int idx, pos; - std::vector& L_sendbuf = local_L_par_comm->send_data->get_buffer(); - - for (int i = 0; i < local_L_par_comm->send_data->size_msgs; i++) - { - idx = local_L_par_comm->send_data->indices[i] * block_size; - pos = i * block_size; - for (int j = 0; j < block_size; j++) - { - result[idx + j] = result_func(result[idx + j], L_sendbuf[pos + j]); - } - } - - if (local_S_par_comm) - { - std::vector& S_sendbuf = local_S_par_comm->send_data->get_buffer(); - for (int i = 0; i < local_S_par_comm->send_data->size_msgs; i++) - { - idx = local_S_par_comm->send_data->indices[i] * block_size; - pos = i * block_size; - for (int j = 0; j < block_size; j++) - { - result[idx + j] = result_func(result[idx + j], S_sendbuf[pos + j]); - } - } - } - else - { - std::vector& G_sendbuf = global_par_comm->send_data->get_buffer(); - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - idx = global_par_comm->send_data->indices[i] * block_size; - pos = i * block_size; - for (int j = 0; j < block_size; j++) - { - result[idx + j] = result_func(result[idx + j], G_sendbuf[pos + j]); - } - } - } - } - template - void complete_T(const int block_size = 1, - std::function init_result_func = &sum_func, - T init_result_func_val = 0) - { - // Complete inter-node communication - global_par_comm->complete_comm_T(block_size, init_result_func, init_result_func_val); - - if (local_S_par_comm) - { - std::vector& G_sendbuf = global_par_comm->send_data->get_buffer(); - local_S_par_comm->communicate_T(G_sendbuf, block_size, init_result_func, - init_result_func_val); - } - } - - - // Matrix Communication - CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* communicate(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - void init_mat_comm(std::vector& send_buffer, const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int b_rows = 1, const int b_cols = 1, const bool has_vals = true); - CSRMatrix* complete_mat_comm(const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - CSRMatrix* communicate_T(const std::vector& rowptr, - const std::vector& col_indices, const std::vector& values, - const int n_result_rows, const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) ; - void init_mat_comm_T(std::vector& send_buffer, - const std::vector& rowptr, const std::vector& col_indices, - const std::vector& values, const int b_rows = 1, - const int b_cols = 1, const bool has_vals = true) ; - CSRMatrix* complete_mat_comm_T(const int n_result_rows, - const int b_rows = 1, const int b_cols = 1, - const bool has_vals = true); - - CSRMatrix* communicate(ParCSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate(ParBSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate(CSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate(A, has_vals); - } - CSRMatrix* communicate_T(CSRMatrix* A, const bool has_vals = true) - { - return CommPkg::communicate_T(A, has_vals); - } - - // Vector Communication - std::vector& communicate(ParVector& v, - const int block_size = 1) - { - return CommPkg::communicate(v, block_size); - } - - void init_comm(ParVector& v, const int block_size = 1) - { - CommPkg::init_comm(v, block_size); - } - - // Helper Methods - std::vector& get_double_buffer() - { - return buffer; - } - std::vector& get_int_buffer() - { - return int_buffer; - } - - // Class Attributes - int recv_size; - ParComm* local_S_par_comm; - ParComm* local_R_par_comm; - ParComm* local_L_par_comm; - ParComm* global_par_comm; - }; -} -#endif diff --git a/raptor/core/matrix.cpp b/raptor/core/matrix.cpp deleted file mode 100644 index c9eb788d..00000000 --- a/raptor/core/matrix.cpp +++ /dev/null @@ -1,1438 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "matrix.hpp" -#include "utilities.hpp" - -using namespace raptor; - -/************************************************************** -***** Matrix Print -************************************************************** -***** Print the nonzeros in the matrix, as well as the row -***** and column according to each nonzero -**************************************************************/ -template -void print_helper(const COOMatrix* A, const std::vector& vals) -{ - int row, col; - - for (int i = 0; i < A->nnz; i++) - { - row = A->idx1[i]; - col = A->idx2[i]; - A->val_print(row, col, vals[i]); - } -} -template -void print_helper(const CSRMatrix* A, const std::vector& vals) -{ - int col, start, end; - - for (int row = 0; row < A->n_rows; row++) - { - start = A->idx1[row]; - end = A->idx1[row+1]; - for (int j = start; j < end; j++) - { - col = A->idx2[j]; - A->val_print(row, col, vals[j]); - } - } -} -template -void print_helper(const CSCMatrix* A, const std::vector& vals) -{ - int row, start, end; - - for (int col = 0; col < A->n_cols; col++) - { - start = A->idx1[col]; - end = A->idx1[col+1]; - for (int j = start; j < end; j++) - { - row = A->idx2[j]; - A->val_print(row, col, vals[j]); - } - } -} -template -void bcoo_print_helper(const BCOOMatrix* A, const std::vector& vals) -{ - int row, col; - - for (int i = 0; i < A->nnz; i++) - { - row = A->idx1[i]; - col = A->idx2[i]; - A->val_print(row, col, vals[i]); - } -} -template -void bsr_print_helper(const BSRMatrix* A, const std::vector& vals) -{ - int col, start, end; - - for (int row = 0; row < A->n_rows; row++) - { - start = A->idx1[row]; - end = A->idx1[row+1]; - for (int j = start; j < end; j++) - { - col = A->idx2[j]; - A->val_print(row, col, vals[j]); - } - } -} -template -void bsc_print_helper(const BSCMatrix* A, const std::vector& vals) -{ - int row, start, end; - - for (int col = 0; col < A->n_cols; col++) - { - start = A->idx1[col]; - end = A->idx1[col+1]; - for (int j = start; j < end; j++) - { - row = A->idx2[j]; - A->val_print(row, col, vals[j]); - } - } -} -void COOMatrix::print() -{ - print_helper(this, vals); -} -void CSRMatrix::print() -{ - print_helper(this, vals); -} -void CSCMatrix::print() -{ - print_helper(this, vals); -} -void BCOOMatrix::print() -{ - bcoo_print_helper(this, vals); -} -void BSRMatrix::print() -{ - bsr_print_helper(this, vals); -} -void BSCMatrix::print() -{ - bsc_print_helper(this, vals); -} - -/************************************************************** -***** Matrix Transpose -************************************************************** -***** Transpose the matrix, reversing rows and columns -***** Retain matrix type, and block structure if applicable -**************************************************************/ -COOMatrix* COOMatrix::transpose() -{ - COOMatrix* T = new COOMatrix(n_rows, n_cols, idx2, idx1, vals); - return T; -} - -BCOOMatrix* BCOOMatrix::transpose() -{ - BCOOMatrix* T = new BCOOMatrix(b_rows, b_cols, n_rows, n_cols, idx2, idx1, block_vals); - return T; -} - -CSRMatrix* CSRMatrix::transpose() -{ - CSCMatrix* T_csc = new CSCMatrix(n_rows, n_cols, idx1, idx2, vals); - CSRMatrix* T = T_csc->to_CSR(); - delete T_csc; - return T; -} - -BSRMatrix* BSRMatrix::transpose() -{ - BSCMatrix* T_bsc = new BSCMatrix(b_rows, b_cols, n_rows, n_cols, idx1, idx2, block_vals); - BSRMatrix* T = (BSRMatrix*) T_bsc->to_CSR(); - delete T_bsc; - return T; -} - -CSCMatrix* CSCMatrix::transpose() -{ - CSRMatrix* T_csr = new CSRMatrix(n_rows, n_cols, idx1, idx2, vals); - CSCMatrix* T = T_csr->to_CSC(); - delete T_csr; - return T; -} -BSCMatrix* BSCMatrix::transpose() -{ - BSRMatrix* T_bsr = new BSRMatrix(b_rows, b_cols, n_rows, n_cols, idx1, idx2, block_vals); - BSCMatrix* T = (BSCMatrix*) T_bsr->to_CSC(); - delete T_bsr; - return T; -} - - -/************************************************************** -***** Matrix Resize -************************************************************** -***** Set the matrix dimensions to those passed as parameters -***** -***** Parameters -***** ------------- -***** _nrows : int -***** Number of rows in matrix -***** _ncols : int -***** Number of cols in matrix -**************************************************************/ -void Matrix::resize(int _n_rows, int _n_cols) -{ - n_rows = _n_rows; - n_cols = _n_cols; -} - -/************************************************************** -***** Matrix Copy -************************************************************** -***** Copy matrix between any subset of matrix types -***** -***** Parameters -***** ------------- -***** Matrix* A : original matrix to copy (of some type) -**************************************************************/ -template -void COO_to_COO(const COOMatrix* A, COOMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.clear(); - B->idx2.clear(); - B_vals.clear(); - - B->idx1.reserve(A->nnz); - B->idx2.reserve(A->nnz); - B_vals.reserve(A->nnz); - for (int i = 0; i < A->nnz; i++) - { - B->idx1.emplace_back(A->idx1[i]); - B->idx2.emplace_back(A->idx2[i]); - B_vals.emplace_back(B->copy_val(A_vals[i])); - } -} -template -void CSR_to_COO(const CSRMatrix* A, COOMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.clear(); - B->idx2.clear(); - B_vals.clear(); - - B->idx1.reserve(A->nnz); - B->idx2.reserve(A->nnz); - B_vals.reserve(A->nnz); - for (int i = 0; i < A->n_rows; i++) - { - int row_start = A->idx1[i]; - int row_end = A->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - B->idx1.emplace_back(i); - B->idx2.emplace_back(A->idx2[j]); - B_vals.emplace_back(B->copy_val(A_vals[j])); - } - } -} -template -void CSC_to_COO(const CSCMatrix* A, COOMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.clear(); - B->idx2.clear(); - B_vals.clear(); - - B->idx1.reserve(A->nnz); - B->idx2.reserve(A->nnz); - B_vals.reserve(A->nnz); - for (int i = 0; i < A->n_cols; i++) - { - int col_start = A->idx1[i]; - int col_end = A->idx1[i+1]; - for (int j = col_start; j < col_end; j++) - { - B->idx1.emplace_back(A->idx2[j]); - B->idx2.emplace_back(i); - B_vals.emplace_back(B->copy_val(A_vals[j])); - } - } - -} -template -void COO_to_CSR(const COOMatrix* A, CSRMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.resize(B->n_rows + 1); - std::fill(B->idx1.begin(), B->idx1.end(), 0); - if (B->nnz) - { - B->idx2.resize(B->nnz); - if (A->data_size()) - B_vals.resize(B->nnz); - } - - // Calculate indptr - for (int i = 0; i < B->nnz; i++) - { - int row = A->idx1[i]; - B->idx1[row+1]++; - } - for (int i = 0; i < B->n_rows; i++) - { - B->idx1[i+1] += B->idx1[i]; - } - - // Add indices and data - std::vector ctr; - if (B->n_rows) - { - ctr.resize(B->n_rows, 0); - } - for (int i = 0; i < B->nnz; i++) - { - int row = A->idx1[i]; - int col = A->idx2[i]; - int index = B->idx1[row] + ctr[row]++; - B->idx2[index] = col; - if (A->data_size()) // Checking that matrix has values (not S) - { - B_vals[index] = B->copy_val(A_vals[i]); - } - } - -} -template -void CSR_to_CSR(const CSRMatrix* A, CSRMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.resize(A->n_rows + 1); - B->idx2.resize(A->nnz); - B_vals.resize(A->nnz); - - B->idx1[0] = 0; - for (int i = 0; i < A->n_rows; i++) - { - B->idx1[i+1] = A->idx1[i+1]; - int row_start = B->idx1[i]; - int row_end = B->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - B->idx2[j] = A->idx2[j]; - B_vals[j] = B->copy_val(A_vals[j]); - } - } - -} -template -void BSR_to_CSR(const BSRMatrix* A, CSRMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows * A->b_rows; - B->n_cols = A->n_cols * A->b_cols; - - B->idx1.resize(B->n_rows + 1); - B->idx2.reserve(A->nnz); - B->vals.reserve(A->nnz); - - T val; - int col; - B->idx1[0] = 0; - for (int i = 0; i < A->n_rows; i++) - { - int row_start = A->idx1[i]; - int row_end = A->idx1[i+1]; - for (int br = 0; br < A->b_rows; br++) - { - for (int j = row_start; j < row_end; j++) - { - for (int bc = 0; bc < A->b_cols; bc++) - { - val = A_vals[j][br*A->b_cols + bc]; - if (fabs(val) > zero_tol) - { - col = A->idx2[j]; - B->vals.emplace_back(val); - B->idx2.emplace_back(col*A->b_cols + bc); - } - } - } - B->idx1[i*A->b_rows + br+1] = B->idx2.size(); - } - } - B->nnz = B->vals.size(); - -} -template -void CSC_to_CSR(const CSCMatrix* A, CSRMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.clear(); - B->idx2.clear(); - B_vals.clear(); - - // Resize vectors to appropriate dimensions - B->idx1.resize(A->n_rows + 1); - B->idx2.resize(A->nnz); - if (A->data_size()) - B_vals.resize(A->nnz); - - // Create indptr, summing number times row appears in CSC - for (int i = 0; i <= A->n_rows; i++) B->idx1[i] = 0; - for (int i = 0; i < A->nnz; i++) - { - B->idx1[A->idx2[i] + 1]++; - } - for (int i = 1; i <= A->n_rows; i++) - { - B->idx1[i] += B->idx1[i-1]; - } - - // Add values to indices and data - std::vector ctr(B->n_rows, 0); - for (int i = 0; i < A->n_cols; i++) - { - int col_start = A->idx1[i]; - int col_end = A->idx1[i+1]; - for (int j = col_start; j < col_end; j++) - { - int row = A->idx2[j]; - int idx = B->idx1[row] + ctr[row]++; - B->idx2[idx] = i; - if (A->data_size()) - { - B_vals[idx] = B->copy_val(A_vals[j]); - } - } - } - -} -template -void COO_to_CSC(const COOMatrix* A, CSCMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.resize(B->n_cols + 1); - std::fill(B->idx1.begin(), B->idx1.end(), 0); - if (B->nnz) - { - B->idx2.resize(B->nnz); - if (A->data_size()) - B_vals.resize(B->nnz); - } - - // Calculate indptr - for (int i = 0; i < B->nnz; i++) - { - int col = A->idx1[i]; - B->idx1[col+1]++; - } - for (int i = 0; i < B->n_cols; i++) - { - B->idx1[i+1] += B->idx1[i]; - } - - // Add indices and data - std::vector ctr; - if (B->n_cols) - { - ctr.resize(B->n_cols, 0); - } - for (int i = 0; i < B->nnz; i++) - { - int col = A->idx1[i]; - int row = A->idx2[i]; - int index = B->idx1[col] + ctr[col]++; - B->idx2[index] = row; - if (A->data_size()) // Checking that matrix has values (not S) - { - B_vals[index] = B->copy_val(A_vals[i]); - } - } - -} -template -void CSR_to_CSC(const CSRMatrix* A, CSCMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.clear(); - B->idx2.clear(); - B_vals.clear(); - - // Resize vectors to appropriate dimensions - B->idx1.resize(A->n_cols + 1); - B->idx2.resize(A->nnz); - if (A->data_size()) - B_vals.resize(A->nnz); - - // Create indptr, summing number times row appears in CSC - for (int i = 0; i <= A->n_cols; i++) B->idx1[i] = 0; - for (int i = 0; i < A->nnz; i++) - { - B->idx1[A->idx2[i] + 1]++; - } - for (int i = 1; i <= A->n_cols; i++) - { - B->idx1[i] += B->idx1[i-1]; - } - - // Add values to indices and data - std::vector ctr(B->n_cols, 0); - for (int i = 0; i < A->n_rows; i++) - { - int row_start = A->idx1[i]; - int row_end = A->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - int col = A->idx2[j]; - int idx = B->idx1[col] + ctr[col]++; - B->idx2[idx] = i; - if (A->data_size()) - { - B_vals[idx] = B->copy_val(A_vals[j]); - } - } - } - -} -template -void CSC_to_CSC(const CSCMatrix* A, CSCMatrix* B, std::vector& A_vals, - std::vector& B_vals) -{ - B->n_rows = A->n_rows; - B->n_cols = A->n_cols; - B->nnz = A->nnz; - - B->idx1.resize(A->n_cols + 1); - B->idx2.resize(A->nnz); - B->vals.resize(A->nnz); - - B->idx1[0] = 0; - for (int i = 0; i < A->n_cols; i++) - { - int col_start = A->idx1[i]; - int col_end = A->idx1[i+1]; - B->idx1[i+1] = col_end; - for (int j = col_start; j < col_end; j++) - { - B->idx2[j] = A->idx2[j]; - B_vals[j] = B->copy_val(A_vals[j]); - } - } -} - - -/************************************************************** -***** Matrix Sort -************************************************************** -***** Sorts the sparse matrix by row and column -**************************************************************/ -template -void sort_helper(COOMatrix* A, std::vector& vals) -{ - if (A->sorted || A->nnz == 0) - { - A->sorted = true; - return; - } - - vec_sort(A->idx1, A->idx2, vals); - - A->sorted = true; - A->diag_first = false; - -} - -template -void sort_helper(CSRMatrix* A, std::vector& vals) -{ - int start, end, row_size; - - if (A->sorted || A->nnz == 0) - { - A->sorted = true; - return; - } - - // Sort the columns of each row (and data accordingly) - for (int row = 0; row < A->n_rows; row++) - { - start = A->idx1[row]; - end = A->idx1[row+1]; - row_size = end - start; - if (row_size == 0) - { - continue; - } - - if (A->data_size()) - vec_sort(A->idx2, vals, start, end); - else - std::sort(A->idx2.begin() + start, A->idx2.begin() + end); - } - - A->sorted = true; - A->diag_first = false; -} - -template -void sort_helper(CSCMatrix* A, std::vector& vals) -{ - int start, end, col_size; - - if (A->sorted || A->nnz == 0) - { - A->sorted = true; - return; - } - - // Sort the columns of each col (and data accordingly) and remove - // duplicates (summing values together) - for (int col = 0; col < A->n_cols; col++) - { - start = A->idx1[col]; - end = A->idx1[col+1]; - col_size = end - start; - if (col_size == 0) - { - continue; - } - - if (A->data_size()) - vec_sort(A->idx2, vals, start, end); - else - std::sort(A->idx2.begin() + start, A->idx2.begin() + end); - } - - A->sorted = true; - A->diag_first = false; -} - -void COOMatrix::sort() -{ - sort_helper(this, vals); -} -void BCOOMatrix::sort() -{ - sort_helper(this, block_vals); -} -void CSRMatrix::sort() -{ - sort_helper(this, vals); -} -void BSRMatrix::sort() -{ - sort_helper(this, block_vals); -} -void CSCMatrix::sort() -{ - sort_helper(this, vals); -} -void BSCMatrix::sort() -{ - sort_helper(this, block_vals); -} - - -/************************************************************** -***** Matrix Move Diagonal -************************************************************** -***** Moves the diagonal element to the front of each row -***** If matrix is not sorted, sorts before moving -**************************************************************/ -template -void move_diag_helper(COOMatrix* A, std::vector& vals) -{ - if (A->diag_first || A->nnz == 0) - { - return; - } - - if (!A->sorted) - { - A->sort(); - } - - int row_start, prev_row; - int row, col; - - // Move diagonal entry to first in row - row_start = 0; - prev_row = 0; - for (int i = 0; i < A->nnz; i++) - { - row = A->idx1[i]; - col = A->idx2[i]; - if (row != prev_row) - { - prev_row = row; - row_start = i; - } - else if (row == col) - { - auto tmp = vals[i]; - for (int j = i; j > row_start; j--) - { - A->idx2[j] = A->idx2[j-1]; - vals[j] = vals[j-1]; - } - A->idx2[row_start] = row; - vals[row_start] = tmp; - } - } - - A->diag_first = true; -} - -template -void move_diag_helper(CSRMatrix* A, std::vector& vals) -{ - int start, end; - int col; - - if (A->diag_first || A->nnz == 0) - { - return; - } - - // Move diagonal values to beginning of each row - if (A->data_size()) - { - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = A->idx2[j]; - if (col == i) - { - auto tmp = vals[j]; - for (int k = j; k > start; k--) - { - A->idx2[k] = A->idx2[k-1]; - vals[k] = vals[k-1]; - } - A->idx2[start] = i; - vals[start] = tmp; - break; - } - } - } - } - else - { - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = A->idx2[j]; - if (col == i) - { - for (int k = j; k > start; k--) - { - A->idx2[k] = A->idx2[k-1]; - } - A->idx2[start] = i; - break; - } - } - } - } - A->diag_first = true; -} - -template -void move_diag_helper(CSCMatrix* A, std::vector& vals) -{ - int start, end; - int row; - - if (A->diag_first || A->nnz == 0) - { - return; - } - - // Move diagonal values to beginning of each row - if (A->data_size()) - { - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - row = A->idx2[j]; - if (row == i) - { - auto tmp = vals[j]; - for (int k = j; k > start; k--) - { - A->idx2[k] = A->idx2[k-1]; - vals[k] = vals[k-1]; - } - A->idx2[start] = i; - vals[start] = tmp; - break; - } - } - } - } - else - { - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - row = A->idx2[j]; - if (row == i) - { - for (int k = j; k > start; k--) - { - A->idx2[k] = A->idx2[k-1]; - } - A->idx2[start] = i; - break; - } - } - } - } - A->diag_first = true; -} - -void COOMatrix::move_diag() -{ - move_diag_helper(this, vals); -} -void BCOOMatrix::move_diag() -{ - move_diag_helper(this, block_vals); -} -void CSRMatrix::move_diag() -{ - move_diag_helper(this, vals); -} -void BSRMatrix::move_diag() -{ - move_diag_helper(this, block_vals); -} -void CSCMatrix::move_diag() -{ - move_diag_helper(this, vals); -} -void BSCMatrix::move_diag() -{ - move_diag_helper(this, block_vals); -} - -/************************************************************** -***** Matrix Removes Duplicates -************************************************************** -***** Goes through each sorted row, and removes duplicate -***** entries, summing associated values -**************************************************************/ -template -void remove_duplicates_helper(COOMatrix* A, std::vector& vals) -{ - if (!A->sorted) - { - A->sort(); - A->diag_first = false; - } - - int prev_row, prev_col, ctr; - int row, col; - - // Remove duplicates (sum together) - prev_row = A->idx1[0]; - prev_col = A->idx2[0]; - ctr = 1; - for (int i = 1; i < A->nnz; i++) - { - row = A->idx1[i]; - col = A->idx2[i]; - if (row == prev_row && col == prev_col) - { - A->append_vals(&vals[ctr - 1], &vals[i]); - } - else - { - if (ctr != i) - { - A->idx1[ctr] = row; - A->idx2[ctr] = col; - vals[ctr] = vals[i]; - } - ctr++; - - prev_row = row; - prev_col = col; - } - } - - A->nnz = ctr; -} - -template -void remove_duplicates_helper(CSRMatrix* A, std::vector& vals) -{ - int orig_start, orig_end; - int new_start; - int col, prev_col; - int ctr, row_size; - - if (!A->sorted) - { - A->sort(); - A->diag_first = false; - } - - orig_start = A->idx1[0]; - for (int row = 0; row < A->n_rows; row++) - { - new_start = A->idx1[row]; - orig_end = A->idx1[row+1]; - row_size = orig_end - orig_start; - if (row_size == 0) - { - orig_start = orig_end; - A->idx1[row+1] = A->idx1[row]; - continue; - } - - // Remove Duplicates - col = A->idx2[orig_start]; - A->idx2[new_start] = col; - vals[new_start] = vals[orig_start]; - prev_col = col; - ctr = 1; - for (int j = orig_start + 1; j < orig_end; j++) - { - col = A->idx2[j]; - if (col == prev_col) - { - A->append_vals(&vals[ctr - 1 + new_start], &vals[j]); - } - else - { - if (A->abs_val(vals[ctr - 1 + new_start]) < zero_tol) - { - ctr--; - } - - A->idx2[ctr + new_start] = col; - vals[ctr + new_start] = vals[j]; - ctr++; - prev_col = col; - } - } - if (A->abs_val(vals[ctr - 1 + new_start]) < zero_tol) - { - ctr--; - } - - orig_start = orig_end; - A->idx1[row+1] = A->idx1[row] + ctr; - } - A->nnz = A->idx1[A->n_rows]; - A->idx2.resize(A->nnz); - vals.resize(A->nnz); -} - -template -void remove_duplicates_helper(CSCMatrix* A, std::vector& vals) -{ - int orig_start, orig_end; - int new_start; - int row, prev_row; - int ctr, col_size; - - if (!A->sorted) - { - A->sort(); - A->diag_first = false; - } - - orig_start = A->idx1[0]; - for (int col = 0; col < A->n_cols; col++) - { - new_start = A->idx1[col]; - orig_end = A->idx1[col+1]; - col_size = orig_end - orig_start; - if (col_size == 0) - { - orig_start = orig_end; - A->idx1[col+1] = A->idx1[col]; - continue; - } - - // Remove Duplicates - row = A->idx2[orig_start]; - A->idx2[new_start] = row; - vals[new_start] = vals[orig_start]; - prev_row = row; - ctr = 1; - for (int j = orig_start + 1; j < orig_end; j++) - { - row = A->idx2[j]; - if (row == prev_row) - { - A->append_vals(&vals[ctr - 1 + new_start], &vals[j]); - } - else - { - if (A->abs_val(vals[ctr - 1 + new_start]) < zero_tol) - { - ctr--; - } - - A->idx2[ctr + new_start] = row; - vals[ctr + new_start] = vals[j]; - ctr++; - prev_row = row; - } - } - if (A->abs_val(vals[ctr - 1 + new_start]) < zero_tol) - { - ctr--; - } - - orig_start = orig_end; - A->idx1[col+1] = A->idx1[col] + ctr; - } - A->nnz = A->idx1[A->n_cols]; - A->idx2.resize(A->nnz); - vals.resize(A->nnz); -} - -void COOMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, vals); -} -void BCOOMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, block_vals); -} -void CSRMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, vals); -} -void BSRMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, block_vals); -} -void CSCMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, vals); -} -void BSCMatrix::remove_duplicates() -{ - remove_duplicates_helper(this, block_vals); -} - -/************************************************************** -***** Matrix Convert -************************************************************** -***** Convert from one type of matrix to another -***** No copies if matrix type remains the same -***** If blocked matrix, converts to block matrix -**************************************************************/ -COOMatrix* COOMatrix::to_COO() -{ - return this; -} -COOMatrix* COOMatrix::to_BCOO() -{ - return this->to_COO(); -} -COOMatrix* BCOOMatrix::to_COO() -{ - return this->to_BCOO(); -} -COOMatrix* BCOOMatrix::to_BCOO() -{ - return this; -} -CSRMatrix* COOMatrix::to_CSR() -{ - CSRMatrix* A = new CSRMatrix(); - COO_to_CSR(this, A, vals, A->vals); - return A; -} -CSRMatrix* COOMatrix::to_BSR() -{ - return this->to_CSR(); -} -CSRMatrix* BCOOMatrix::to_CSR() -{ - return this->to_BSR(); -} -CSRMatrix* BCOOMatrix::to_BSR() -{ - BSRMatrix* A = new BSRMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - COO_to_CSR(this, A, block_vals, A->block_vals); - return A; -} -CSCMatrix* COOMatrix::to_CSC() -{ - CSCMatrix* A = new CSCMatrix(); - COO_to_CSC(this, A, vals, A->vals); - return A; -} -CSCMatrix* COOMatrix::to_BSC() -{ - return this->to_CSC(); -} -CSCMatrix* BCOOMatrix::to_CSC() -{ - return this->to_BSC(); -} -CSCMatrix* BCOOMatrix::to_BSC() -{ - BSCMatrix* A = new BSCMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - COO_to_CSC(this, A, block_vals, A->block_vals); - return A; -} - -COOMatrix* CSRMatrix::to_COO() -{ - COOMatrix* A = new COOMatrix(); - CSR_to_COO(this, A, vals, A->vals); - return A; -} -COOMatrix* CSRMatrix::to_BCOO() -{ - return this->to_COO(); -} -COOMatrix* BSRMatrix::to_COO() -{ - return this->to_BCOO(); -} -COOMatrix* BSRMatrix::to_BCOO() -{ - BCOOMatrix* A = new BCOOMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSR_to_COO(this, A, block_vals, A->block_vals); - return A; -} -CSRMatrix* CSRMatrix::to_CSR() -{ - return this; -} -CSRMatrix* CSRMatrix::to_BSR() -{ - return this->to_CSR(); -} -CSRMatrix* BSRMatrix::to_CSR() -{ - CSRMatrix* A = new CSRMatrix(); - BSR_to_CSR(this, A, block_vals, A->vals); - return A; -} -CSRMatrix* BSRMatrix::to_BSR() -{ - return this; -} -CSCMatrix* CSRMatrix::to_CSC() -{ - CSCMatrix* A = new CSCMatrix(); - CSR_to_CSC(this, A, vals, A->vals); - return A; -} -CSCMatrix* CSRMatrix::to_BSC() -{ - return this->to_CSC(); -} -CSCMatrix* BSRMatrix::to_CSC() -{ - return this->to_BSC(); -} -CSCMatrix* BSRMatrix::to_BSC() -{ - BSCMatrix* A = new BSCMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSR_to_CSC(this, A, block_vals, A->block_vals); - return A; -} - -COOMatrix* CSCMatrix::to_COO() -{ - COOMatrix* A = new COOMatrix(); - CSC_to_COO(this, A, vals, A->vals); - return A; -} -COOMatrix* CSCMatrix::to_BCOO() -{ - return this->to_COO(); -} -COOMatrix* BSCMatrix::to_COO() -{ - return this->to_BCOO(); -} -COOMatrix* BSCMatrix::to_BCOO() -{ - BCOOMatrix* A = new BCOOMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSC_to_COO(this, A, block_vals, A->block_vals); - return A; -} -CSRMatrix* CSCMatrix::to_CSR() -{ - CSRMatrix* A = new CSRMatrix(); - CSC_to_CSR(this, A, vals, A->vals); - return A; -} -CSRMatrix* CSCMatrix::to_BSR() -{ - return this->to_CSR(); -} -CSRMatrix* BSCMatrix::to_CSR() -{ - return this->to_BSR(); -} -CSRMatrix* BSCMatrix::to_BSR() -{ - BSRMatrix* A = new BSRMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSC_to_CSR(this, A, block_vals, A->block_vals); - return A; -} -CSCMatrix* CSCMatrix::to_CSC() -{ - return this; -} -CSCMatrix* CSCMatrix::to_BSC() -{ - return this->to_CSC(); -} -CSCMatrix* BSCMatrix::to_CSC() -{ - return this->to_BSC(); -} -CSCMatrix* BSCMatrix::to_BSC() -{ - return this; -} - -/************************************************************** -***** Matrix Copy -**************************************************************/ -COOMatrix* COOMatrix::copy() -{ - COOMatrix* A = new COOMatrix(); - COO_to_COO(this, A, vals, A->vals); - return A; -} -BCOOMatrix* BCOOMatrix::copy() -{ - BCOOMatrix* A = new BCOOMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - COO_to_COO(this, A, block_vals, A->block_vals); - return A; -} -CSRMatrix* CSRMatrix::copy() -{ - CSRMatrix* A = new CSRMatrix(); - CSR_to_CSR(this, A, vals, A->vals); - return A; -} -BSRMatrix* BSRMatrix::copy() -{ - BSRMatrix* A = new BSRMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSR_to_CSR(this, A, block_vals, A->block_vals); - return A; -} -CSCMatrix* CSCMatrix::copy() -{ - CSCMatrix* A = new CSCMatrix(); - CSC_to_CSC(this, A, vals, A->vals); - return A; -} -BSCMatrix* BSCMatrix::copy() -{ - BSCMatrix* A = new BSCMatrix(); - A->b_rows = b_rows; - A->b_cols = b_cols; - A->b_size = b_size; - CSC_to_CSC(this, A, block_vals, A->block_vals); - return A; -} - -/************************************************************** -***** Matrix Block Removal -************************************************************** -***** Determines which columns were kept after removing -***** block structure from matrices -**************************************************************/ -void COOMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = true; - } -} -void BCOOMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = false; - } - - int idx, first_col; - double* block_val; - for (int i = 0; i < nnz; i++) - { - block_val = block_vals[i]; - for (int row = 0; row < b_rows; row++) - { - idx = row * b_cols; - first_col = idx2[i]*b_cols; - for (int col = 0; col < b_cols; col++) - { - if(fabs(block_val[idx + col]) > zero_tol) - { - col_check[first_col + col] = true; - } - } - } - } -} - -void CSCMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = true; - } -} -void BSCMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = false; - } - - int start, end, idx; - double* block_val; - for (int j = 0; j < n_cols; j++) - { - start = idx1[j]; - end = idx1[j+1]; - for (int row = 0; row < b_rows; row++) - { - idx = row * b_cols; - for (int i = start; i < end; i++) - { - block_val = block_vals[i]; - for (int col = 0; col < b_cols; col++) - { - if(fabs(block_val[idx + col]) > zero_tol) - { - col_check[j + col] = true; - } - } - } - } - } -} - -void CSRMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = true; - } -} -void BSRMatrix::block_removal_col_check(bool* col_check) -{ - for (int i = 0; i < n_cols * b_cols; i++) - { - col_check[i] = false; - } - - int start, end, idx, first_col; - double* block_val; - for (int i = 0; i < n_rows; i++) - { - start = idx1[i]; - end = idx1[i+1]; - for (int row = 0; row < b_rows; row++) - { - idx = row * b_cols; - for (int j = start; j < end; j++) - { - first_col = idx2[j]*b_cols; - block_val = block_vals[j]; - for (int col = 0; col < b_cols; col++) - { - if(fabs(block_val[idx + col]) > zero_tol) - { - col_check[first_col + col] = true; - } - } - } - } - } -} diff --git a/raptor/core/matrix.hpp b/raptor/core/matrix.hpp deleted file mode 100644 index 40df47d7..00000000 --- a/raptor/core/matrix.hpp +++ /dev/null @@ -1,1353 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_MATRIX_HPP -#define RAPTOR_CORE_MATRIX_HPP - -#include "types.hpp" -#include "vector.hpp" - -/************************************************************** - ***** Matrix Base Class - ************************************************************** - ***** This class constructs a sparse matrix, supporting simple linear - ***** algebra operations. - ***** - ***** Attributes - ***** ------------- - ***** n_rows : int - ***** Number of rows - ***** n_cols : int - ***** Number of columns - ***** nnz : int - ***** Number of nonzeros - ***** idx1 : std::vector - ***** List of position indices, specific to type of matrix - ***** idx2 : std::vector - ***** List of position indices, specific to type of matrix - ***** vals : std::vector - ***** List of values in matrix - ***** - ***** Methods - ***** ------- - ***** resize(int n_rows, int n_cols) - ***** Resizes dimension of matrix to passed parameters - ***** mult(Vector* x, Vector* b) - ***** Sparse matrix-vector multiplication b = A * x - ***** residual(Vector* x, Vector* b, Vector* r) - ***** Calculates the residual r = b - A * x - ***** - ***** Virtual Methods - ***** ------- - ***** format() - ***** Returns the format of the sparse matrix (COO, CSR, CSC) - ***** sort() - ***** Sorts the matrix by position. Whether row-wise or - ***** column-wise depends on matrix format. - ***** add_value(int row, int col, double val) - ***** Adds val to position (row, col) - ***** TODO -- make sure this is working for CSR/CSC - **************************************************************/ -namespace raptor -{ - // Forward Declaration of classes so objects can be used - class COOMatrix; - class CSRMatrix; - class CSCMatrix; - class Matrix - { - - public: - - /************************************************************** - ***** Matrix Base Class Constructor - ************************************************************** - ***** Sets matrix dimensions, and sets nnz to 0 - ***** - ***** Parameters - ***** ------------- - ***** _nrows : int - ***** Number of rows in matrix - ***** _ncols : int - ***** Number of cols in matrix - **************************************************************/ - Matrix(int _nrows, int _ncols) - { - n_rows = _nrows; - n_cols = _ncols; - nnz = 0; - sorted = false; - diag_first = false; - b_rows = 1; - b_cols = 1; - b_size = 1; - } - - /************************************************************** - ***** Matrix Base Class Constructor - ************************************************************** - ***** Sets matrix dimensions and nnz based on Matrix* A - ***** - ***** Parameters - ***** ------------- - ***** A : Matrix* - ***** Matrix to be copied - **************************************************************/ - Matrix() - { - n_rows = 0; - n_cols = 0; - nnz = 0; - sorted = false; - diag_first = false; - b_rows = 1; - b_cols = 1; - b_size = 1; - } - - virtual ~Matrix(){} - - template - void init_from_lists(std::vector& _idx1, std::vector& _idx2, - std::vector& data) - { - nnz = data.size(); - resize_data(nnz); - - T* val_list = (T*) get_data(); - - std::copy(_idx1.begin(), _idx1.end(), std::back_inserter(idx1)); - std::copy(_idx2.begin(), _idx2.end(), std::back_inserter(idx2)); - - for (int i = 0; i < nnz; i++) - { - val_list[i] = copy_val(data[i]); - } - } - - // Virtual Methods - virtual format_t format() = 0; - virtual void sort() = 0; - virtual void move_diag() = 0; - virtual void remove_duplicates() = 0; - virtual void print() = 0; - virtual CSRMatrix* to_CSR() = 0; - virtual CSCMatrix* to_CSC() = 0; - virtual COOMatrix* to_COO() = 0; - virtual CSRMatrix* to_BSR() = 0; - virtual CSCMatrix* to_BSC() = 0; - virtual COOMatrix* to_BCOO() = 0; - virtual void block_removal_col_check(bool* col_check) = 0; - virtual Matrix* copy() = 0; - - virtual void spmv(const double* x, double* b) const = 0; - virtual void spmv_append(const double* x, double* b) const = 0; - virtual void spmv_append_T(const double* x, double* b) const = 0; - virtual void spmv_append_neg(const double* x, double* b) const = 0; - virtual void spmv_append_neg_T(const double* x, double* b) const = 0; - virtual void spmv_residual(const double* x, const double* b, double* r) const = 0; - - virtual CSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL) = 0; - virtual CSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL) = 0; - virtual Matrix* transpose() = 0; - - double* get_values(Vector& x) const - { - return x.values.data(); - } - template T* get_values(std::vector& x) const - { - return x.data(); - } - template T* get_values(T* x) const - { - return x; - } - - // Method for printing the value at one position - // (either single or block value) - void val_print(int row, int col, double val) const - { - printf("A[%d][%d] = %e\n", row, col, val); - } - void val_print(int row, int col, double* val) const - { - for (int i = 0; i < b_rows; i++) - { - for (int j = 0; j < b_cols; j++) - { - printf("A[%d][%d], BlockPos[%d][%d] = %e\n", row, col, i, j, val[i*b_cols+j]); - } - } - } - - double copy_val(double val) const - { - return val; - } - double* copy_val(double* val) const - { - double* new_val = new double[b_size]; - for (int i = 0; i < b_size; i++) - { - new_val[i] = val[i]; - } - return new_val; - } - - // Method for finding the absolute value of - // either a single or block value - double abs_val(double val) const - { - return fabs(val); - } - double abs_val(double* val) const - { - double sum = 0; - for (int i = 0; i < b_size; i++) - { - sum += fabs(val[i]); - } - return sum; - } - - // Methods for appending two values - // (either single or block values) - void append_vals(double* val, double* addl_val) const - { - *val += *addl_val; - } - void append_vals(double** val, double** addl_val) const - { - for (int i = 0; i < b_size; i++) - { - *val[i] += *addl_val[i]; - } - delete[] *addl_val; - } - void mult_vals(double val, double addl_val, double* sum, - int nr, int nc0, int n_inner) const - { - *sum += (val * addl_val); - } - void mult_vals(double* val, double* addl_val, double** sum, - int nr, int nc, int n_inner) const - { - for (int i = 0; i < nr; i++) // Go through b_rows of A - { - for (int j = 0; j < nc; j++) // Go through b_cols of B - { - double s = 0; - for (int k = 0; k < n_inner; k++) // Go through b_cols of A (== b_rows of B) - { - s += val[i*n_inner + k] * addl_val[k*n_inner + j]; - } - (*sum)[i*nc + j] += s; - } - } - } - void mult_T_vals(double val, double addl_val, double* sum, - int nr, int nc, int n_inner) const - { - *sum += (val * addl_val); - } - void mult_T_vals(double* val, double* addl_val, double** sum, - int nr, int nc, int n_inner) const - { - for (int i = 0; i < nr; i++) // Go through b_rows of A - { - for (int j = 0; j < nc; j++) // Go through b_cols of B - { - double s = 0; - for (int k = 0; k < n_inner; k++) // Go through b_cols of A (== b_rows of B) - { - s += val[k*n_inner + i] * addl_val[k*n_inner + j]; - } - (*sum)[i*nc + j] += s; - } - } - } - - - void append(int _idx1, int _idx2, double* b, const double* x, const double val) const - { - b[_idx1] += val*x[_idx2]; - } - void append_T(int _idx1, int _idx2, double* b, const double* x, const double val) const - { - b[_idx2] += val*x[_idx1]; - } - void append_neg(int _idx1, int _idx2, double* b, const double* x, const double val) const - { - b[_idx1] -= val*x[_idx2]; - } - void append_neg_T(int _idx1, int _idx2, double* b, const double* x, const double val) const - { - b[_idx2] -= val*x[_idx1]; - } - void append(int _idx1, int _idx2, double* b, const double* x, const double* val) const - { - int first_row = _idx1*b_rows; - int first_col = _idx2*b_cols; - for (int row = 0; row < b_rows; row++) - { - for (int col = 0; col < b_cols; col++) - { - b[first_row + row] += (val[row * b_cols + col] * x[first_col + col]); - } - } - } - void append_T(int _idx1, int _idx2, double* b, const double* x, const double* val) const - { - int first_row = _idx1*b_rows; - int first_col = _idx2*b_cols; - - for (int row = 0; row < b_rows; row++) - { - double x_val = x[first_row + row]; - for (int col = 0; col < b_cols; col++) - { - b[first_col + col] += (val[row * b_cols + col] * x_val); - } - } - } - void append_neg(int _idx1, int _idx2, double* b, const double* x, const double* val) const - { - int first_row = _idx1*b_rows; - int first_col = _idx2*b_cols; - for (int row = 0; row < b_rows; row++) - { - for (int col = 0; col < b_cols; col++) - { - b[first_row + row] -= (val[row * b_cols + col] * x[first_col + col]); - } - } - } - void append_neg_T(int _idx1, int _idx2, double* b, const double* x, const double* val) const - { - int first_row = _idx1*b_rows; - int first_col = _idx1*b_cols; - for (int row = 0; row < b_rows; row++) - { - for (int col = 0; col < b_cols; col++) - { - b[first_col + col] -= (val[row * b_cols + col] * x[first_row + row]); - } - } - } - - template void mult(T& x, U& b) const - { - spmv(get_values(x), get_values(b)); - } - template void mult_T(T& x, U& b) const - { - int cols = n_cols * b_cols; - for (int i = 0; i < cols; i++) - b[i] = 0.0; - spmv_append_T(get_values(x), get_values(b)); - } - template void mult_append(T& x, U& b) const - { - spmv_append(get_values(x), get_values(b)); - } - template void mult_append_T(T& x, U& b) const - { - spmv_append_T(get_values(x), get_values(b)); - } - template void mult_append_neg(T& x, U& b) const - { - spmv_append_neg(get_values(x), get_values(b)); - } - template void mult_append_neg_T(T& x, U& b) const - { - spmv_append_neg_T(get_values(x), get_values(b)); - } - template void residual(T& x, U& b, V& r) const - { - spmv_residual(get_values(x), get_values(b), get_values(r)); - } - - CSRMatrix* mult(CSRMatrix* B, int* B_to_C = NULL); - CSRMatrix* mult(CSCMatrix* B, int* B_to_C = NULL); - CSRMatrix* mult(COOMatrix* B, int* B_to_C = NULL); - CSRMatrix* mult_T(CSCMatrix* A, int* C_map = NULL); - CSRMatrix* mult_T(CSRMatrix* A, int* C_map = NULL); - CSRMatrix* mult_T(COOMatrix* A, int* C_map = NULL); - - virtual void add_value(int row, int col, double value) = 0; - virtual void add_value(int row, int col, double* value) = 0; - - Matrix* add(CSRMatrix* A, bool remove_dup = true); - void add_append(CSRMatrix* A, CSRMatrix* C, bool remove_dup = true); - Matrix* subtract(CSRMatrix* A); - - void resize(int _n_rows, int _n_cols); - - virtual void resize_data(int size) = 0; - virtual void* get_data() = 0; - virtual int data_size() const = 0; - virtual void reserve_size(int size) = 0; - virtual double get_val(const int j, const int k) = 0; - - std::vector idx1; - std::vector idx2; - std::vector vals; - - int b_rows; - int b_cols; - int b_size; - - int n_rows; - int n_cols; - int nnz; - - bool sorted; - bool diag_first; - - }; - - -/************************************************************** - ***** COOMatrix Class (Inherits from Matrix Base Class) - ************************************************************** - ***** This class constructs a sparse matrix in COO format. - ***** - ***** Methods - ***** ------- - ***** format() - ***** Returns the format of the sparse matrix (COO) - ***** sort() - ***** Sorts the matrix by row, and by column within each row. - ***** add_value(int row, int col, double val) - ***** Adds val to position (row, col) - ***** rows() - ***** Returns std::vector& containing the rows corresponding - ***** to each nonzero - ***** cols() - ***** Returns std::vector& containing the cols corresponding - ***** to each nonzero - ***** data() - ***** Returns std::vector& containing the nonzero values - **************************************************************/ - class COOMatrix : public Matrix - { - - public: - - /************************************************************** - ***** COOMatrix Class Constructor - ************************************************************** - ***** Initializes an empty COOMatrix - ***** - ***** Parameters - ***** ------------- - ***** _nrows : int - ***** Number of rows in Matrix - ***** _ncols : int - ***** Number of columns in Matrix - ***** nnz_per_row : int - ***** Prediction of (approximately) number of nonzeros - ***** per row, used in reserving space - **************************************************************/ - COOMatrix(int _nrows, int _ncols, int nnz_per_row = 1) : Matrix(_nrows, _ncols) - { - int _nnz = nnz_per_row * _nrows; - if (_nnz) - { - idx1.reserve(_nnz); - idx2.reserve(_nnz); - vals.reserve(_nnz); - } - } - - COOMatrix(int _nrows, int _ncols, double* _data) : Matrix(_nrows, _ncols) - { - init_from_dense(_data); - } - - COOMatrix(int _nrows, int _ncols, std::vector& rows, std::vector& cols, - std::vector& data) : Matrix(_nrows, _ncols) - { - init_from_lists(rows, cols, data); - } - - COOMatrix() - { - } - - ~COOMatrix() - { - - } - - template - void init_from_dense(T* _data) - { - nnz = 0; - int nnz_dense = n_rows*n_cols; - - if (nnz_dense) - { - idx1.resize(nnz_dense); - idx2.resize(nnz_dense); - resize_data(nnz_dense); - } - - T* val_list = (T*) get_data(); - - for (int i = 0; i < n_rows; i++) - { - for (int j = 0; j < n_cols; j++) - { - int pos = i * n_cols + j; - if (abs_val(_data[pos]) > zero_tol) - { - idx1[nnz] = i; - idx2[nnz] = j; - val_list[nnz] = copy_val(_data[pos]); - nnz++; - } - } - } - } - - COOMatrix* transpose(); - - void print(); - - void sort(); - void move_diag(); - void remove_duplicates(); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - CSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - CSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - COOMatrix* copy(); - - void add_value(int row, int col, double value) - { - if (fabs(value) > zero_tol) - { - idx1.emplace_back(row); - idx2.emplace_back(col); - vals.emplace_back(value); - nnz++; - } - } - - void add_value(int row, int col, double* value) - { - idx1.emplace_back(row); - idx2.emplace_back(col); - vals.emplace_back(*value); - nnz++; - } - - format_t format() - { - return COO; - } - - void* get_data() - { - return vals.data(); - } - int data_size() const - { - return vals.size(); - } - - void resize_data(int size) - { - vals.resize(size); - } - - void reserve_size(int size) - { - idx1.reserve(size); - idx2.reserve(size); - vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return vals[j]; - } -}; - - -/************************************************************** - ***** CSRMatrix Class (Inherits from Matrix Base Class) - ************************************************************** - ***** This class constructs a sparse matrix in CSR format. - ***** - ***** Methods - ***** ------- - ***** format() - ***** Returns the format of the sparse matrix (CSR) - ***** sort() - ***** Sorts the matrix. Already in row-wise order, but sorts - ***** the columns in each row. - ***** add_value(int row, int col, double val) - ***** TODO -- add this functionality - ***** indptr() - ***** Returns std::vector& row pointer. The ith element points to - ***** the index of indices() corresponding to the first column to lie on - ***** row i. - ***** indices() - ***** Returns std::vector& containing the cols corresponding - ***** to each nonzero - ***** data() - ***** Returns std::vector& containing the nonzero values - **************************************************************/ - class CSRMatrix : public Matrix - { - - public: - - /************************************************************** - ***** CSRMatrix Class Constructor - ************************************************************** - ***** Initializes an empty CSRMatrix - ***** - ***** Parameters - ***** ------------- - ***** _nrows : int - ***** Number of rows in Matrix - ***** _ncols : int - ***** Number of columns in Matrix - ***** nnz_per_row : int - ***** Prediction of (approximately) number of nonzeros - ***** per row, used in reserving space - **************************************************************/ - CSRMatrix(int _nrows, int _ncols, int _nnz = 0): Matrix(_nrows, _ncols) - { - idx1.resize(_nrows + 1); - if (_nnz) - { - idx2.reserve(_nnz); - vals.reserve(_nnz); - } - } - - CSRMatrix(int _nrows, int _ncols, double* _data) : Matrix(_nrows, _ncols) - { - init_from_dense(_data); - } - - CSRMatrix(int _nrows, int _ncols, std::vector& rowptr, - std::vector& cols, std::vector& data) : Matrix(_nrows, _ncols) - { - init_from_lists(rowptr, cols, data); - } - - CSRMatrix() - { - } - - ~CSRMatrix() - { - - } - - template - void init_from_dense(T* _data) - { - int nnz_dense = n_rows*n_cols; - idx1.resize(n_rows + 1); - if (nnz_dense) - { - idx2.resize(nnz_dense); - resize_data(nnz_dense); - } - - T* val_list = (T*) get_data(); - - idx1[0] = 0; - for (int i = 0; i < n_rows; i++) - { - for (int j = 0; j < n_cols; j++) - { - int pos = i * n_cols + j; - if (abs_val(_data[pos])) - { - idx2[nnz] = j; - val_list[nnz] = copy_val(_data[pos]); - nnz++; - } - } - idx1[i+1] = nnz; - } - } - - CSRMatrix* transpose(); - - void print(); - - void sort(); - void move_diag(); - void remove_duplicates(); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - CSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - CSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - CSRMatrix* add(CSRMatrix* A, bool remove_dup = true); - void add_append(CSRMatrix* A, CSRMatrix* C, bool remove_dup = true); - CSRMatrix* subtract(CSRMatrix* A); - - CSRMatrix* strength(strength_t strength_type = Classical, - double theta = 0.0, int num_variables = 1, int* variables = NULL); - CSRMatrix* aggregate(); - CSRMatrix* fit_candidates(data_t* B, data_t* R, int num_candidates, - double tol = 1e-10); - - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - CSRMatrix* copy(); - - format_t format() - { - return CSR; - } - - void add_value(int row, int col, double value) - { - if (fabs(value) > zero_tol) - { - idx2.emplace_back(col); - vals.emplace_back(value); - nnz++; - } - } - void add_value(int row, int col, double* value) - { - idx2.emplace_back(col); - vals.emplace_back(*value); - nnz++; - } - - void* get_data() - { - return vals.data(); - } - int data_size() const - { - return vals.size(); - } - void resize_data(int size) - { - vals.resize(size); - } - void reserve_size(int size) - { - idx2.reserve(size); - vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return vals[j]; - } - -}; - -/************************************************************** - ***** CSCMatrix Class (Inherits from Matrix Base Class) - ************************************************************** - ***** This class constructs a sparse matrix in CSC format. - ***** - ***** Methods - ***** ------- - ***** format() - ***** Returns the format of the sparse matrix (CSC) - ***** sort() - ***** Sorts the matrix. Already in col-wise order, but sorts - ***** the rows in each column. - ***** add_value(int row, int col, double val) - ***** TODO -- add this functionality - ***** indptr() - ***** Returns std::vector& column pointer. The ith element points to - ***** the index of indices() corresponding to the first row to lie on - ***** column i. - ***** indices() - ***** Returns std::vector& containing the rows corresponding - ***** to each nonzero - ***** data() - ***** Returns std::vector& containing the nonzero values - **************************************************************/ - class CSCMatrix : public Matrix - { - - public: - - CSCMatrix(int _nrows, int _ncols, int _nnz = 0): Matrix(_nrows, _ncols) - { - idx1.resize(_ncols + 1); - if (_nnz) - { - idx2.reserve(_nnz); - vals.reserve(_nnz); - } - nnz = _nnz; - } - - CSCMatrix(int _nrows, int _ncols, double* _data) : Matrix(_nrows, _ncols) - { - init_from_dense(_data); - } - - CSCMatrix(int _nrows, int _ncols, std::vector& colptr, - std::vector& rows, std::vector& data) : Matrix(_nrows, _ncols) - { - init_from_lists(colptr, rows, data); - } - - CSCMatrix() - { - } - - ~CSCMatrix() - { - - } - - template - void init_from_dense(T* _data) - { - int nnz_dense = n_rows*n_cols; - - idx1.resize(n_cols + 1); - if (nnz_dense) - { - idx2.resize(nnz_dense); - resize_data(nnz_dense); - } - - T* val_list = (T*) get_data(); - - idx1[0] = 0; - for (int i = 0; i < n_cols; i++) - { - for (int j = 0; j < n_rows; j++) - { - int pos = i * n_cols + j; - if (abs_val(_data[pos]) > zero_tol) - { - idx2[nnz] = j; - val_list[nnz] = copy_val(_data[pos]); - nnz++; - } - } - idx1[i+1] = nnz; - } - - } - - CSCMatrix* transpose(); - void print(); - - void sort(); - void move_diag(); - void remove_duplicates(); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - - CSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - CSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - void jacobi(Vector& x, Vector& b, Vector& tmp, double omega = .667); - - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - CSCMatrix* copy(); - - format_t format() - { - return CSC; - } - - void add_value(int row, int col, double value) - { - if (fabs(value) > zero_tol) - { - idx2.emplace_back(row); - vals.emplace_back(value); - nnz++; - } - } - void add_value(int row, int col, double* value) - { - idx2.emplace_back(row); - vals.emplace_back(*value); - nnz++; - } - - void* get_data() - { - return vals.data(); - } - int data_size() const - { - return vals.size(); - } - void resize_data(int size) - { - vals.resize(size); - } - void reserve_size(int size) - { - idx2.reserve(size); - vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return vals[j]; - } - - }; - - - - - -// Forward Declaration of Blocked Classes -class BCOOMatrix; -class BSRMatrix; -class BSCMatrix; - -class BSRMatrix : public CSRMatrix -{ - public: - BSRMatrix(int num_block_rows, int num_block_cols, int block_row_size, - int block_col_size, int _nnz = 1) - : CSRMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - } - - BSRMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, double** data) - : CSRMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_dense(data); - } - - - BSRMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, std::vector& rowptr, - std::vector& cols, std::vector& data) - : CSRMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_lists(rowptr, cols, data); - } - - BSRMatrix(CSRMatrix* A, int block_row_size, int block_col_size) : CSRMatrix(A->n_rows / block_row_size, A->n_cols / block_col_size, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - // Convert CSR to BSR - std::vector idx(A->n_cols, -1); - for (int bsr_row = 0; bsr_row < n_rows; bsr_row++) - { - for (int block = 0; block < b_rows; block++) - { - int csr_row = bsr_row*b_rows+block; - - for (int j = A->idx1[csr_row]; j < A->idx1[csr_row+1]; j++) - { - int csr_col = A->idx2[j]; - int bsr_col = csr_col / b_rows; - if (idx[bsr_col] == -1) - { - idx[bsr_col] = idx2.size(); - idx2.push_back(bsr_col); - block_vals.push_back(new double[b_size]()); - } - int idx_row = csr_row % b_rows; - int idx_col = csr_col % b_cols; - block_vals[idx[bsr_col]][idx_row*b_rows + idx_col] = A->vals[j]; - } - } - idx1[bsr_row+1] = idx2.size(); - - // Reset IDX array for next BSR row - for (int j = idx1[bsr_row]; j < idx1[bsr_row+1]; j++) - idx[idx2[j]] = -1; - } - } - - - BSRMatrix() : CSRMatrix() - { - b_rows = 1; - b_cols = 1; - b_size = 1; - } - - ~BSRMatrix() - { - for (std::vector::iterator it = block_vals.begin(); - it != block_vals.end(); ++it) - delete[] *it; - } - - BSRMatrix* transpose(); - void sort(); - void remove_duplicates(); - void move_diag(); - - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - void print(); - BSRMatrix* copy(); - - BSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - BSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - format_t format() - { - return BSR; - } - - void add_value(int row, int col, double* value) - { - idx2.emplace_back(col); - block_vals.emplace_back(copy_val(value)); - nnz++; - } - - void* get_data() - { - return block_vals.data(); - } - int data_size() const - { - return block_vals.size(); - } - void resize_data(int size) - { - block_vals.resize(size); - } - void reserve_size(int size) - { - idx2.reserve(size); - block_vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return block_vals[j][k]; - } - - std::vector block_vals; -}; - -class BCOOMatrix : public COOMatrix -{ - public: - BCOOMatrix(int num_block_rows, int num_block_cols, int block_row_size, - int block_col_size, int nnz_per_block_row = 1) - : COOMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - } - - BCOOMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, double** values) - : COOMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_dense(values); - } - - BCOOMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, - std::vector& rows, std::vector& cols, - std::vector& data) - : COOMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_lists(rows, cols, data); - } - - BCOOMatrix() : COOMatrix() - { - b_rows = 1; - b_cols = 1; - b_size = 1; - } - - ~BCOOMatrix() - { - for (std::vector::iterator it = block_vals.begin(); - it != block_vals.end(); ++it) - delete[] *it; - } - - BCOOMatrix* transpose(); - void sort(); - void remove_duplicates(); - void move_diag(); - - void print(); - BCOOMatrix* copy(); - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - BSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - BSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - void add_value(int row, int col, double* values) - { - idx1.emplace_back(row); - idx2.emplace_back(col); - block_vals.emplace_back(copy_val(values)); - nnz++; - } - - format_t format() - { - return BCOO; - } - - void* get_data() - { - return block_vals.data(); - } - int data_size() const - { - return block_vals.size(); - } - void resize_data(int size) - { - block_vals.resize(size); - } - void reserve_size(int size) - { - idx1.reserve(size); - idx2.reserve(size); - block_vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return block_vals[j][k]; - } - - std::vector block_vals; -}; - -// Blocks are still stored row-wise in BSC matrix... -class BSCMatrix : public CSCMatrix -{ - public: - BSCMatrix(int num_block_rows, int num_block_cols, int block_row_size, - int block_col_size, int _nnz = 1) - : CSCMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - } - - BSCMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, double** data) - : CSCMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_dense(data); - } - - - BSCMatrix(int num_block_rows, int num_block_cols, - int block_row_size, int block_col_size, std::vector& colptr, - std::vector& rows, std::vector& data) - : CSCMatrix(num_block_rows, num_block_cols, 0) - { - b_rows = block_row_size; - b_cols = block_col_size; - b_size = b_rows * b_cols; - - init_from_lists(colptr, rows, data); - } - - - BSCMatrix() : CSCMatrix() - { - b_rows = 1; - b_cols = 1; - b_size = 1; - } - - ~BSCMatrix() - { - for (std::vector::iterator it = block_vals.begin(); - it != block_vals.end(); ++it) - delete[] *it; - } - - BSCMatrix* transpose(); - void sort(); - void remove_duplicates(); - void move_diag(); - - COOMatrix* to_COO(); - CSRMatrix* to_CSR(); - CSCMatrix* to_CSC(); - CSRMatrix* to_BSR(); - CSCMatrix* to_BSC(); - COOMatrix* to_BCOO(); - - void block_removal_col_check(bool* col_check); - - void print(); - BSCMatrix* copy(); - - BSRMatrix* spgemm(CSRMatrix* B, int* B_to_C = NULL); - BSRMatrix* spgemm_T(CSCMatrix* A, int* C_map = NULL); - - void spmv(const double* x, double* b) const; - void spmv_append(const double* x, double* b) const; - void spmv_append_T(const double* x, double* b) const; - void spmv_append_neg(const double* x, double* b) const; - void spmv_append_neg_T(const double* x, double* b) const; - void spmv_residual(const double* x, const double* b, double* r) const; - - format_t format() - { - return BSC; - } - - void add_value(int row, int col, double* value) - { - idx2.emplace_back(row); - block_vals.emplace_back(copy_val(value)); - nnz++; - } - - void* get_data() - { - return block_vals.data(); - } - void resize_data(int size) - { - block_vals.resize(size); - } - int data_size() const - { - return block_vals.size(); - } - void reserve_size(int size) - { - idx2.reserve(size); - block_vals.reserve(size); - } - - double get_val(const int j, const int k) - { - return block_vals[j][k]; - } - - std::vector block_vals; -}; - - - -} - -#endif - diff --git a/raptor/core/mpi_types.cpp b/raptor/core/mpi_types.cpp deleted file mode 100644 index bd3c935c..00000000 --- a/raptor/core/mpi_types.cpp +++ /dev/null @@ -1,331 +0,0 @@ -bool profile = false; -double collective_t = 0.0; -double p2p_t = 0.0; -double* current_t; -double mat_t = 0.0; -double vec_t = 0.0; -double total_t = 0.0; -double new_comm_t = 0.0; - -#include -#include "mpi_types.hpp" - -void init_profile() -{ - profile = true; - reset_profile(); -} -void reset_profile() -{ - collective_t = 0.0; - p2p_t = 0.0; - mat_t = 0.0; - vec_t = 0.0; - new_comm_t = 0.0; - if (profile) total_t = -MPI_Wtime(); - else total_t = 0.0; -} -void finalize_profile() -{ - profile = false; - total_t += MPI_Wtime(); -} -void average_profile(int n_iter) -{ - total_t /= n_iter; - collective_t /= n_iter; - p2p_t /= n_iter; - vec_t /= n_iter; - mat_t /= n_iter; - new_comm_t /= n_iter; -} -void print_profile(const char* string) -{ - int rank; - double t0; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - - MPI_Allreduce(&total_t, &t0, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); - if (rank == 0) printf("%s Total Time: %e\n", string, t0); - if (fabs(t0 - total_t) > zero_tol) - reset_profile(); - MPI_Reduce(&collective_t, &t0, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - if (rank == 0 && t0 > 0) printf("%s Collective Comm Time: %e\n", string, t0); - MPI_Reduce(&p2p_t, &t0, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - if (rank == 0 && t0 > 0) printf("%s P2P Comm Time: %e\n", string, t0); - MPI_Reduce(&vec_t, &t0, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - if (rank == 0 && t0 > 0) printf("%s Vec Comm Time: %e\n", string, t0); - MPI_Reduce(&mat_t, &t0, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - if (rank == 0 && t0 > 0) printf("%s Mat Comm Time: %e\n", string, t0); -} - - -// Collective Methods -int RAPtor_MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Reduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, int root, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Gather(const void *sendbuf, int sendcount, RAPtor_MPI_Datatype sendtype, - void *recvbuf, int recvcount, RAPtor_MPI_Datatype recvtype, int root, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, - recvtype, root, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Allgather(const void* sendbuf, int sendcount, RAPtor_MPI_Datatype sendtype, - void *recvbuf, int recvcount, RAPtor_MPI_Datatype recvtype, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Allgather(sendbuf, sendcount, sendtype, recvbuf, - recvcount, recvtype, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Allgatherv(const void* sendbuf, int sendcount, RAPtor_MPI_Datatype sendtype, - void *recvbuf, const int *recvcounts, const int* displs, - RAPtor_MPI_Datatype recvtype, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Allgatherv(sendbuf, sendcount, sendtype, recvbuf, recvcounts, - displs, recvtype, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, RAPtor_MPI_Comm comm, RAPtor_MPI_Request* request) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request); - if (profile) collective_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &collective_t; - return val; -} -int RAPtor_MPI_Bcast(void *buffer, int count, RAPtor_MPI_Datatype datatype, - int root, RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Bcast(buffer, count, datatype, root, comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Ibarrier(RAPtor_MPI_Comm comm, RAPtor_MPI_Request *request) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Ibarrier(comm, request); - if (profile) collective_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &collective_t; - return val; -} -int RAPtor_MPI_Barrier(RAPtor_MPI_Comm comm) -{ - if (profile) collective_t -= RAPtor_MPI_Wtime(); - int val = MPI_Barrier(comm); - if (profile) collective_t += RAPtor_MPI_Wtime(); - return val; -} - - - -// Point-to-Point Methods -int RAPtor_MPI_Send(const void *buf, int count, RAPtor_MPI_Datatype datatype, int dest, - int tag, RAPtor_MPI_Comm comm) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Send(buf, count, datatype, dest, tag, comm); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Isend(const void *buf, int count, RAPtor_MPI_Datatype datatype, int dest, int tag, - RAPtor_MPI_Comm comm, RAPtor_MPI_Request * request) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Isend(buf, count, datatype, dest, tag, comm, request); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &p2p_t; - return val; -} -int RAPtor_MPI_Issend(const void *buf, int count, RAPtor_MPI_Datatype datatype, int dest, int tag, - RAPtor_MPI_Comm comm, RAPtor_MPI_Request * request) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Issend(buf, count, datatype, dest, tag, comm, request); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &p2p_t; - return val; -} -int RAPtor_MPI_Recv(void *buf, int count, RAPtor_MPI_Datatype datatype, int source, int tag, - RAPtor_MPI_Comm comm, RAPtor_MPI_Status * status) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Recv(buf, count, datatype, source, tag, comm, status); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Irecv(void *buf, int count, RAPtor_MPI_Datatype datatype, int source, - int tag, RAPtor_MPI_Comm comm, RAPtor_MPI_Request * request) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Irecv(buf, count, datatype, source, tag, comm, request); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &p2p_t; - return val; -} -int RAPtor_MPI_Probe(int source, int tag, RAPtor_MPI_Comm comm, RAPtor_MPI_Status* status) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Probe(source, tag, comm, status); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Iprobe(int source, int tag, RAPtor_MPI_Comm comm, - int *flag, RAPtor_MPI_Status *status) -{ - if (profile) p2p_t -= RAPtor_MPI_Wtime(); - int val = MPI_Iprobe(source, tag, comm, flag, status); - if (profile) p2p_t += RAPtor_MPI_Wtime(); - if (profile) current_t = &p2p_t; - return val; -} - - - -// Waiting for completion -int RAPtor_MPI_Wait(RAPtor_MPI_Request *request, RAPtor_MPI_Status *status) -{ - if (profile) *current_t -= RAPtor_MPI_Wtime(); - int val = MPI_Wait(request, status); - if (profile) *current_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Waitall(int count, RAPtor_MPI_Request array_of_requests[], RAPtor_MPI_Status array_of_statuses[]) -{ - if (profile) *current_t -= RAPtor_MPI_Wtime(); - int val = MPI_Waitall(count, array_of_requests, array_of_statuses); - if (profile) *current_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Test(MPI_Request *request, int *flag, MPI_Status *status) -{ - if (profile) *current_t -= RAPtor_MPI_Wtime(); - int val = MPI_Test(request, flag, status); - if (profile) *current_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Testall(int count, MPI_Request array_of_requests[], - int* flag, MPI_Status array_of_statuses[]) -{ - if (profile) *current_t -= RAPtor_MPI_Wtime(); - int val = MPI_Testall(count, array_of_requests, flag, array_of_statuses); - if (profile) *current_t += RAPtor_MPI_Wtime(); - return val; -} - - -// Packing/Unpacking Data -int RAPtor_MPI_Pack(const void *inbuf, int incount, - RAPtor_MPI_Datatype datatype, void *outbuf, int outside, int *position, - RAPtor_MPI_Comm comm) -{ - return MPI_Pack(inbuf, incount, datatype, outbuf, outside, position, comm); -} -int RAPtor_MPI_Unpack(const void *inbuf, int insize, int *position, - void *outbuf, int outcount, RAPtor_MPI_Datatype datatype, RAPtor_MPI_Comm comm) -{ - return MPI_Unpack(inbuf, insize, position, outbuf, outcount, datatype, comm); -} -int RAPtor_MPI_Pack_size(int incount, RAPtor_MPI_Datatype datatype, - RAPtor_MPI_Comm comm, int *size) -{ - return MPI_Pack_size(incount, datatype, comm, size); -} - - -// Other utilities (no communication) -double RAPtor_MPI_Wtime() -{ - return MPI_Wtime(); -} -int RAPtor_MPI_Get_count(const RAPtor_MPI_Status *status, - RAPtor_MPI_Datatype datatype, int *count) -{ - return MPI_Get_count(status, datatype, count); -} -int RAPtor_MPI_Comm_rank(RAPtor_MPI_Comm comm, int* rank) -{ - return MPI_Comm_rank(comm, rank); -} -int RAPtor_MPI_Comm_size(RAPtor_MPI_Comm comm, int* size) -{ - return MPI_Comm_size(comm, size); -} - - - -// Creating New Communicator -int RAPtor_MPI_Comm_split(RAPtor_MPI_Comm comm, int color, int key, - RAPtor_MPI_Comm* new_comm) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Comm_split(comm, color, key, new_comm); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Comm_group(RAPtor_MPI_Comm comm, RAPtor_MPI_Group *group) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Comm_group(comm, group); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Comm_create_group(RAPtor_MPI_Comm comm, RAPtor_MPI_Group group, - int tag, RAPtor_MPI_Comm* newcomm) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Comm_create_group(comm, group, tag, newcomm); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Group_incl(RAPtor_MPI_Group group, int n, const int ranks[], - RAPtor_MPI_Group *newgroup) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Group_incl(group, n, ranks, newgroup); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Comm_free(RAPtor_MPI_Comm *comm) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Comm_free(comm); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Group_free(RAPtor_MPI_Group* group) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Group_free(group); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} -int RAPtor_MPI_Comm_dup(MPI_Comm comm, MPI_Comm* new_comm) -{ - if (profile) new_comm_t -= RAPtor_MPI_Wtime(); - int val = MPI_Comm_dup(comm, new_comm); - if (profile) new_comm_t += RAPtor_MPI_Wtime(); - return val; -} - diff --git a/raptor/core/mpi_types.hpp b/raptor/core/mpi_types.hpp deleted file mode 100644 index 8fbcdad1..00000000 --- a/raptor/core/mpi_types.hpp +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_MPI_TYPES_HPP_ -#define RAPTOR_CORE_MPI_TYPES_HPP_ - -#include "types.hpp" -#include - -// Global Timing Variables -extern bool profile; -extern double collective_t; -extern double p2p_t; -extern double* current_t; -extern double mat_t; -extern double vec_t; -extern double total_t; - -extern void init_profile(); -extern void reset_profile(); -extern void finalize_profile(); -extern void print_profile(const char* string); -extern void average_profile(int n_iter); - -#define RAPtor_MPI_COMM_WORLD MPI_COMM_WORLD -#define RAPtor_MPI_COMM_NULL MPI_COMM_NULL - -#define RAPtor_MPI_Comm MPI_Comm -#define RAPtor_MPI_Group MPI_Group -#define RAPtor_MPI_Datatype MPI_Datatype -#define RAPtor_MPI_Request MPI_Request -#define RAPtor_MPI_Status MPI_Status -#define RAPtor_MPI_Op MPI_Op - -#define RAPtor_MPI_INT MPI_INT -#define RAPtor_MPI_DOUBLE MPI_DOUBLE -#define RAPtor_MPI_DOUBLE_INT MPI_DOUBLE_INT -#define RAPtor_MPI_LONG MPI_LONG -#define RAPtor_MPI_PACKED MPI_PACKED - -#define RAPtor_MPI_STATUS_IGNORE MPI_STATUS_IGNORE -#define RAPtor_MPI_STATUSES_IGNORE MPI_STATUSES_IGNORE - -#define RAPtor_MPI_SOURCE MPI_SOURCE -#define RAPtor_MPI_ANY_SOURCE MPI_ANY_SOURCE - -#define RAPtor_MPI_IN_PLACE MPI_IN_PLACE -#define RAPtor_MPI_SUM MPI_SUM -#define RAPtor_MPI_MAX MPI_MAX -#define RAPtor_MPI_BOR MPI_BOR - - -// MPI Information -extern int RAPtor_MPI_Comm_rank(RAPtor_MPI_Comm comm, int *rank); -extern int RAPtor_MPI_Comm_size(RAPtor_MPI_Comm comm, int *size); - -// Collective Operations -extern int RAPtor_MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Reduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, int root, - RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Gather(const void *sendbuf, int sendcount, - RAPtor_MPI_Datatype sendtype, void *recvbuf, int recvcount, - RAPtor_MPI_Datatype recvtype, int root, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Allgather(const void* sendbuf, int sendcount, - RAPtor_MPI_Datatype sendtype, void *recvbuf, int recvcount, - RAPtor_MPI_Datatype recvtype, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Allgatherv(const void* sendbuf, int sendcount, - RAPtor_MPI_Datatype sendtype, void *recvbuf, const int *recvcounts, - const int* displs, RAPtor_MPI_Datatype recvtype, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Iallreduce(const void *sendbuf, void *recvbuf, int count, - RAPtor_MPI_Datatype datatype, RAPtor_MPI_Op op, RAPtor_MPI_Comm comm, - RAPtor_MPI_Request* request); -extern int RAPtor_MPI_Ibarrier(RAPtor_MPI_Comm comm, - RAPtor_MPI_Request *request); -extern int RAPtor_MPI_Barrier(RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Bcast(void *buffer, int count, RAPtor_MPI_Datatype datatype, - int root, RAPtor_MPI_Comm comm); - -// Point-to-Point Operations -extern int RAPtor_MPI_Send(const void *buf, int count, - RAPtor_MPI_Datatype datatype, int dest, int tag, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Isend(const void *buf, int count, - RAPtor_MPI_Datatype datatype, int dest, int tag, RAPtor_MPI_Comm comm, - RAPtor_MPI_Request * request); -extern int RAPtor_MPI_Issend(const void *buf, int count, - RAPtor_MPI_Datatype datatype, int dest, int tag, RAPtor_MPI_Comm comm, - RAPtor_MPI_Request * request); -extern int RAPtor_MPI_Recv(void *buf, int count, RAPtor_MPI_Datatype datatype, - int source, int tag, RAPtor_MPI_Comm comm, RAPtor_MPI_Status * status); -extern int RAPtor_MPI_Irecv(void *buf, int count, RAPtor_MPI_Datatype datatype, - int source, int tag, RAPtor_MPI_Comm comm, RAPtor_MPI_Request * request); - -// Waiting for data -extern int RAPtor_MPI_Wait(RAPtor_MPI_Request *request, - RAPtor_MPI_Status *status); -extern int RAPtor_MPI_Waitall(int count, RAPtor_MPI_Request array_of_requests[], - RAPtor_MPI_Status array_of_statuses[]); -extern int RAPtor_MPI_Probe(int source, int tag, RAPtor_MPI_Comm comm, - RAPtor_MPI_Status* status); -extern int RAPtor_MPI_Iprobe(int source, int tag, RAPtor_MPI_Comm comm, - int *flag, RAPtor_MPI_Status *status); -extern int RAPtor_MPI_Test(MPI_Request *request, int *flag, MPI_Status *status); -extern int RAPtor_MPI_Testall(int count, MPI_Request array_of_requests[], - int* flag, MPI_Status array_of_statuses[]); - -// Packing Data -extern int RAPtor_MPI_Pack(const void *inbuf, int incount, - RAPtor_MPI_Datatype datatype, void *outbuf, int outside, int *position, - RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Unpack(const void *inbuf, int insize, int *position, - void *outbuf, int outcount, RAPtor_MPI_Datatype datatype, RAPtor_MPI_Comm comm); -extern int RAPtor_MPI_Get_count(const RAPtor_MPI_Status *status, - RAPtor_MPI_Datatype datatype, int *count); -extern int RAPtor_MPI_Pack_size(int incount, RAPtor_MPI_Datatype datatype, - RAPtor_MPI_Comm comm, int *size); - -// Timing Data -extern double RAPtor_MPI_Wtime(); - -// Creating Communicators -extern int RAPtor_MPI_Comm_free(RAPtor_MPI_Comm *comm); -extern int RAPtor_MPI_Comm_split(RAPtor_MPI_Comm comm, int color, int key, - RAPtor_MPI_Comm* new_comm); -extern int RAPtor_MPI_Comm_group(RAPtor_MPI_Comm comm, RAPtor_MPI_Group *group); -extern int RAPtor_MPI_Comm_create_group(RAPtor_MPI_Comm comm, RAPtor_MPI_Group group, - int tag, RAPtor_MPI_Comm* newcomm); -extern int RAPtor_MPI_Group_incl(RAPtor_MPI_Group group, int n, const int ranks[], - RAPtor_MPI_Group *newgroup); -extern int RAPtor_MPI_Group_free(RAPtor_MPI_Group* group); -extern int RAPtor_MPI_Comm_dup(MPI_Comm comm, MPI_Comm* new_comm); - -#endif diff --git a/raptor/core/par_matrix.cpp b/raptor/core/par_matrix.cpp deleted file mode 100644 index edb1e611..00000000 --- a/raptor/core/par_matrix.cpp +++ /dev/null @@ -1,1116 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "par_matrix.hpp" - -using namespace raptor; - -// Declare private methods -void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B); - -/************************************************************** -***** ParMatrix Add Value -************************************************************** -***** Adds a value to the local portion of the parallel matrix, -***** determining whether it should be added to diagonal or -***** off-diagonal block. -***** -***** Parameters -***** ------------- -***** row : index_t -***** Local row of value -***** global_col : index_t -***** Global column of value -***** value : data_t -***** Value to be added to parallel matrix -**************************************************************/ -void ParMatrix::add_value( - int row, - index_t global_col, - data_t value) -{ - if (global_col >= partition->first_local_col - && global_col <= partition->last_local_col) - { - on_proc->add_value(row, global_col - partition->first_local_col, value); - } - else - { - off_proc->add_value(row, global_col, value); - } -} - -/************************************************************** -***** ParMatrix Add Global Value -************************************************************** -***** Adds a value to the local portion of the parallel matrix, -***** determining whether it should be added to diagonal or -***** off-diagonal block. -***** -***** Parameters -***** ------------- -***** global_row : index_t -***** Global row of value -***** global_col : index_t -***** Global column of value -***** value : data_t -***** Value to be added to parallel matrix -**************************************************************/ -void ParMatrix::add_global_value( - index_t global_row, - index_t global_col, - data_t value) -{ - add_value(global_row - partition->first_local_row, global_col, value); -} - -/************************************************************** -***** ParMatrix Finalize -************************************************************** -***** Finalizes the diagonal and off-diagonal matrices. Sorts -***** the local_to_global indices, and creates the parallel -***** communicator -***** -***** Parameters -***** ------------- -***** create_comm : bool (optional) -***** Boolean for whether parallel communicator should be -***** created (default is true) -**************************************************************/ -void ParMatrix::condense_off_proc() -{ - if (off_proc->nnz == 0) - { - return; - } - - int prev_col = -1; - - std::map orig_to_new; - - std::copy(off_proc->idx2.begin(), off_proc->idx2.end(), - std::back_inserter(off_proc_column_map)); - std::sort(off_proc_column_map.begin(), off_proc_column_map.end()); - - off_proc_num_cols = 0; - for (std::vector::iterator it = off_proc_column_map.begin(); - it != off_proc_column_map.end(); ++it) - { - if (*it != prev_col) - { - orig_to_new[*it] = off_proc_num_cols; - off_proc_column_map[off_proc_num_cols++] = *it; - prev_col = *it; - } - } - off_proc_column_map.resize(off_proc_num_cols); - - for (std::vector::iterator it = off_proc->idx2.begin(); - it != off_proc->idx2.end(); ++it) - { - *it = orig_to_new[*it]; - } -} - -void ParMatrix::finalize(bool create_comm) -{ - on_proc->sort(); - on_proc->remove_duplicates(); - off_proc->sort(); - off_proc->remove_duplicates(); - - int rank, num_procs; - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - - // Assume nonzeros in each on_proc column - if (on_proc_num_cols > (int)on_proc_column_map.size()) - { - on_proc_column_map.resize(on_proc_num_cols); - for (int i = 0; i < on_proc_num_cols; i++) - { - on_proc_column_map[i] = i + partition->first_local_col; - } - } - - if (local_num_rows > (int)local_row_map.size()) - { - local_row_map.resize(local_num_rows); - for (int i = 0; i < local_num_rows; i++) - { - local_row_map[i] = i + partition->first_local_row; - } - } - - // Condense columns in off_proc, storing global - // columns as 0-num_cols, and store mapping - if (off_proc->nnz) - { - condense_off_proc(); - } - else - { - off_proc_num_cols = 0; - } - off_proc->resize(local_num_rows, off_proc_num_cols); - local_nnz = on_proc->nnz + off_proc->nnz; - - if (create_comm){ - comm = new ParComm(partition, off_proc_column_map); - } - else - comm = new ParComm(partition); -} - -int* ParMatrix::map_partition_to_local() -{ - int* on_proc_partition_to_col = new int[partition->local_num_cols+1]; - for (int i = 0; i < partition->local_num_cols+1; i++) on_proc_partition_to_col[i] = -1; - for (int i = 0; i < on_proc_num_cols; i++) - { - on_proc_partition_to_col[on_proc_column_map[i] - partition->first_local_col] = i; - } - - return on_proc_partition_to_col; -} - - -/************************************************************** -***** ParBSRMatrix to ParCSRMatrix Convert -**************************************************************/ -void bsr_to_csr_copy_helper(ParBSRMatrix* A, ParCSRMatrix* B) -{ - if (B->on_proc) - { - delete B->on_proc; - } - if (B->off_proc) - { - delete B->off_proc; - } - - // Convert on and off proc to CSR - B->on_proc = A->on_proc->to_CSR(); - B->off_proc = A->off_proc->to_CSR(); - - B->local_nnz = B->on_proc->nnz + B->off_proc->nnz; - B->global_num_rows = A->global_num_rows * A->on_proc->b_rows; - B->global_num_cols = A->global_num_cols * A->on_proc->b_cols; - - B->on_proc_num_cols = B->on_proc->n_cols; - B->off_proc_num_cols = B->off_proc->n_cols; - - // Updated partition - B->partition = new Partition(B->global_num_rows, B->global_num_cols, - B->on_proc->n_rows, B->on_proc->n_cols, - A->partition->first_local_row * A->on_proc->b_rows, - A->partition->first_local_col * A->on_proc->b_cols); - B->local_num_rows = B->partition->local_num_rows; - - // Updated column and row maps - - B->finalize(false); - - // Determine which cols of blocks are non-zero - bool* off_proc_nz_cols = new bool[A->off_proc_num_cols * A->off_proc->b_cols]; - A->off_proc->block_removal_col_check(off_proc_nz_cols); - - // Update off_proc_column_map - int first_col; - int off_proc_map_indx = 0; - for (int i = 0; i < A->off_proc_num_cols; i++) - { - first_col = A->off_proc_column_map[i] * A->off_proc->b_cols; - for (int j = 0; j < A->off_proc->b_cols; j++) - { - if (off_proc_nz_cols[i*A->off_proc->b_cols + j]) - { - B->off_proc_column_map[off_proc_map_indx] = first_col + j; - off_proc_map_indx++; - } - } - } - - // Updated how communicators are created - if (A->comm) - { - B->comm = new ParComm(B->partition, B->off_proc_column_map, B->on_proc_column_map); - } - else - { - B->comm = NULL; - } - - if (A->tap_comm) - { - B->tap_comm = new TAPComm(B->partition, B->off_proc_column_map, B->on_proc_column_map); - } - else - { - B->tap_comm = NULL; - } - - if (A->tap_mat_comm) - { - B->tap_mat_comm = new TAPComm(B->partition, B->off_proc_column_map, B->on_proc_column_map); - } - else - { - B->tap_mat_comm = NULL; - } - - delete[] off_proc_nz_cols; -} - - - -/************************************************************** -***** ParMatrix Convert -************************************************************** -***** Convert from one type of parmatrix to another -***** No copies if parmatrix type remains the same -***** If blocked parmatrix, converts to block matrix -**************************************************************/ -ParCOOMatrix* ParCOOMatrix::to_ParCOO() -{ - return this; -} -ParCOOMatrix* ParCOOMatrix::to_ParBCOO() -{ - return this->to_ParCOO(); -} -ParCOOMatrix* ParBCOOMatrix::to_ParCOO() -{ - return this->to_ParBCOO(); -} -ParCOOMatrix* ParBCOOMatrix::to_ParBCOO() -{ - return this; -} -ParCSRMatrix* ParCOOMatrix::to_ParCSR() -{ - ParCSRMatrix* A = new ParCSRMatrix(); - A->copy_helper(this); - return A; -} -ParCSRMatrix* ParCOOMatrix::to_ParBSR() -{ - return this->to_ParCSR(); -} -ParCSRMatrix* ParBCOOMatrix::to_ParCSR() -{ - return this->to_ParBSR(); -} -ParCSRMatrix* ParBCOOMatrix::to_ParBSR() -{ - ParBSRMatrix* A = new ParBSRMatrix(); - A->copy_helper(this); - return A; -} -ParCSCMatrix* ParCOOMatrix::to_ParCSC() -{ - ParCSCMatrix* A = new ParCSCMatrix(); - A->copy_helper(this); - return A; -} -ParCSCMatrix* ParCOOMatrix::to_ParBSC() -{ - return this->to_ParCSC(); -} -ParCSCMatrix* ParBCOOMatrix::to_ParCSC() -{ - return this->to_ParBSC(); -} -ParCSCMatrix* ParBCOOMatrix::to_ParBSC() -{ - ParBSCMatrix* A = new ParBSCMatrix(); - A->copy_helper(this); - return A; -} - -ParCOOMatrix* ParCSRMatrix::to_ParCOO() -{ - ParCOOMatrix* A = new ParCOOMatrix(); - A->copy_helper(this); - return A; -} -ParCOOMatrix* ParCSRMatrix::to_ParBCOO() -{ - return this->to_ParCOO(); -} -ParCOOMatrix* ParBSRMatrix::to_ParCOO() -{ - return this->to_ParBCOO(); -} -ParCOOMatrix* ParBSRMatrix::to_ParBCOO() -{ - ParBCOOMatrix* A = new ParBCOOMatrix(); - A->copy_helper(this); - return A; -} -ParCSRMatrix* ParCSRMatrix::to_ParCSR() -{ - return this; -} -ParCSRMatrix* ParCSRMatrix::to_ParBSR() -{ - return this->to_ParCSR(); -} -ParCSRMatrix* ParBSRMatrix::to_ParCSR() -{ - ParCSRMatrix* A = new ParCSRMatrix(); - bsr_to_csr_copy_helper(this, A); - return A; -} -ParCSRMatrix* ParBSRMatrix::to_ParBSR() -{ - return this; -} -ParCSCMatrix* ParCSRMatrix::to_ParCSC() -{ - ParCSCMatrix* A = new ParCSCMatrix(); - A->copy_helper(this); - return A; -} -ParCSCMatrix* ParCSRMatrix::to_ParBSC() -{ - return this->to_ParCSC(); -} -ParCSCMatrix* ParBSRMatrix::to_ParCSC() -{ - return this->to_ParBSC(); -} -ParCSCMatrix* ParBSRMatrix::to_ParBSC() -{ - ParBSCMatrix* A = new ParBSCMatrix(); - A->copy_helper(this); - return A; -} - -ParCOOMatrix* ParCSCMatrix::to_ParCOO() -{ - ParCOOMatrix* A = new ParCOOMatrix(); - A->copy_helper(this); - return A; -} -ParCOOMatrix* ParCSCMatrix::to_ParBCOO() -{ - return this->to_ParCOO(); -} -ParCOOMatrix* ParBSCMatrix::to_ParCOO() -{ - return this->to_ParBCOO(); -} -ParCOOMatrix* ParBSCMatrix::to_ParBCOO() -{ - ParBCOOMatrix* A = new ParBCOOMatrix(); - A->copy_helper(this); - return A; -} -ParCSRMatrix* ParCSCMatrix::to_ParCSR() -{ - ParCSRMatrix* A = new ParCSRMatrix(); - A->copy_helper(this); - return A; -} -ParCSRMatrix* ParCSCMatrix::to_ParBSR() -{ - return this->to_ParCSR(); -} -ParCSRMatrix* ParBSCMatrix::to_ParCSR() -{ - return this->to_ParBSR(); -} -ParCSRMatrix* ParBSCMatrix::to_ParBSR() -{ - ParBSRMatrix* A = new ParBSRMatrix(); - A->copy_helper(this); - return A; -} -ParCSCMatrix* ParCSCMatrix::to_ParCSC() -{ - return this; -} -ParCSCMatrix* ParCSCMatrix::to_ParBSC() -{ - return this->to_ParCSC(); -} -ParCSCMatrix* ParBSCMatrix::to_ParCSC() -{ - return this->to_ParBSC(); -} -ParCSCMatrix* ParBSCMatrix::to_ParBSC() -{ - return this; -} - - -void ParCSRMatrix::copy_structure(ParBSRMatrix* A) -{ - on_proc->idx1.clear(); - on_proc->idx2.clear(); - off_proc->idx1.clear(); - off_proc->idx2.clear(); - - std::copy(A->on_proc->idx1.begin(), A->on_proc->idx1.end(), - std::back_inserter(on_proc->idx1)); - std::copy(A->on_proc->idx2.begin(), A->on_proc->idx2.end(), - std::back_inserter(on_proc->idx2)); - - std::copy(A->off_proc->idx1.begin(), A->off_proc->idx1.end(), - std::back_inserter(off_proc->idx1)); - std::copy(A->off_proc->idx2.begin(), A->off_proc->idx2.end(), - std::back_inserter(off_proc->idx2)); - - on_proc->n_rows = A->on_proc->n_rows; - on_proc->n_cols = A->on_proc->n_cols; - on_proc->nnz = A->on_proc->nnz; - - off_proc->n_rows = A->off_proc->n_rows; - off_proc->n_cols = A->off_proc->n_cols; - off_proc->nnz = A->off_proc->nnz; - - ParMatrix::copy_helper(A); -} - - -void ParMatrix::default_copy_helper(ParMatrix* A) -{ - partition = A->partition; - partition->num_shared++; - - local_nnz = A->local_nnz; - local_num_rows = A->local_num_rows; - global_num_rows = A->global_num_rows; - global_num_cols = A->global_num_cols; - - std::copy(A->off_proc_column_map.begin(), A->off_proc_column_map.end(), - std::back_inserter(off_proc_column_map)); - std::copy(A->on_proc_column_map.begin(), A->on_proc_column_map.end(), - std::back_inserter(on_proc_column_map)); - std::copy(A->local_row_map.begin(), A->local_row_map.end(), - std::back_inserter(local_row_map)); - - off_proc_num_cols = off_proc_column_map.size(); - on_proc_num_cols = on_proc_column_map.size(); - - if (A->comm) - { - comm = A->comm; - comm->num_shared++; - } - else - { - comm = NULL; - } - - if (A->tap_comm) - { - tap_comm = A->tap_comm; - tap_comm->num_shared++; - } - else - { - tap_comm = NULL; - } - - if (A->tap_mat_comm) - { - tap_mat_comm = A->tap_mat_comm; - tap_mat_comm->num_shared++; - } - else - { - tap_mat_comm = NULL; - } -} - -void ParMatrix::copy_helper(ParCOOMatrix* A) -{ - default_copy_helper(A); -} -void ParMatrix::copy_helper(ParCSRMatrix* A) -{ - default_copy_helper(A); -} -void ParMatrix::copy_helper(ParCSCMatrix* A) -{ - default_copy_helper(A); -} - - -void ParCOOMatrix::copy_helper(ParCOOMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->copy(); - off_proc = A->off_proc->copy(); - - ParMatrix::copy_helper(A); -} - -void ParCOOMatrix::copy_helper(ParCSRMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_COO(); - off_proc = A->off_proc->to_COO(); - - ParMatrix::copy_helper(A); -} - -void ParCOOMatrix::copy_helper(ParCSCMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_COO(); - off_proc = A->off_proc->to_COO(); - - ParMatrix::copy_helper(A); -} - -void ParCSRMatrix::copy_helper(ParCSRMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->copy(); - off_proc = A->off_proc->copy(); - - ParMatrix::copy_helper(A); -} - -void ParCSRMatrix::copy_helper(ParCSCMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_CSR(); - off_proc = A->off_proc->to_CSR(); - - ParMatrix::copy_helper(A); -} - -void ParCSRMatrix::copy_helper(ParCOOMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_CSR(); - off_proc = A->off_proc->to_CSR(); - - ParMatrix::copy_helper(A); -} - -void ParCSCMatrix::copy_helper(ParCSRMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_CSC(); - off_proc = A->off_proc->to_CSC(); - - ParMatrix::copy_helper(A); -} - -void ParCSCMatrix::copy_helper(ParCSCMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->copy(); - off_proc = A->off_proc->copy(); - - ParMatrix::copy_helper(A); -} - -void ParCSCMatrix::copy_helper(ParCOOMatrix* A) -{ - if (on_proc) - { - delete on_proc; - } - if (off_proc) - { - delete off_proc; - } - - on_proc = A->on_proc->to_CSC(); - off_proc = A->off_proc->to_CSC(); - - ParMatrix::copy_helper(A); -} - -// Main transpose -ParCSRMatrix* ParCSRMatrix::transpose() -{ - int start, end; - int proc; - int col, col_start, col_end; - int ctr, prev_ctr, size, bytes; - int col_count, count; - int col_size; - int idx, row; - RAPtor_MPI_Status recv_status; - - Partition* part_T; - Matrix* on_proc_T; - Matrix* off_proc_T; - CSCMatrix* send_mat; - CSCMatrix* recv_mat; - ParCSRMatrix* T = NULL; - - std::vector send_buffer; - std::vector recv_buffer; - - // Transpose partition - part_T = partition->transpose(); - - // Transpose local (on_proc) matrix - on_proc_T = on_proc->transpose(); - - // Allocate vectors for sending off_proc matrix - send_mat = off_proc->to_CSC(); - recv_mat = new CSCMatrix(local_num_rows, comm->send_data->size_msgs); - - int int_size, dbl_size; - MPI_Pack_size(1, RAPtor_MPI_INT, comm->mpi_comm, &int_size); - MPI_Pack_size(1, RAPtor_MPI_DOUBLE, comm->mpi_comm, &dbl_size); - - bytes = 0; - for (int i = 0; i < comm->recv_data->num_msgs; i++) - { - start = comm->recv_data->indptr[i]; - end = comm->recv_data->indptr[i+1]; - for (col = start; col < end; col++) - { - col_start = send_mat->idx1[col]; - col_end = send_mat->idx1[col+1]; - col_size = col_end - col_start; - bytes += col_size * (int_size + dbl_size) + int_size; - } - } - send_buffer.resize(bytes); - std::vector send_ptr(comm->recv_data->num_msgs+1); - - // Add off_proc cols of matrix to send buffer - ctr = 0; - prev_ctr = 0; - for (int i = 0; i < comm->recv_data->num_msgs; i++) - { - proc = comm->recv_data->procs[i]; - start = comm->recv_data->indptr[i]; - end = comm->recv_data->indptr[i+1]; - for (col = start; col < end; col++) - { - col_start = send_mat->idx1[col]; - col_end = send_mat->idx1[col+1]; - col_size = col_end - col_start; - RAPtor_MPI_Pack(&col_size, 1, RAPtor_MPI_INT, send_buffer.data(), bytes, &ctr, comm->mpi_comm); - for (int k = col_start; k < col_end; k++) - { - RAPtor_MPI_Pack(&(local_row_map[send_mat->idx2[k]]), 1, RAPtor_MPI_INT, - send_buffer.data(), bytes, &ctr, comm->mpi_comm); - } - RAPtor_MPI_Pack(&(send_mat->vals[col_start]), col_end - col_start, RAPtor_MPI_DOUBLE, - send_buffer.data(), bytes, &ctr, comm->mpi_comm); - } - - RAPtor_MPI_Isend(&(send_buffer[prev_ctr]), ctr - prev_ctr, RAPtor_MPI_PACKED, proc, - comm->key, comm->mpi_comm, &(comm->recv_data->requests[i])); - prev_ctr = ctr; - } - - col_count = 0; - recv_mat->idx1[0] = 0; - recv_mat->nnz = 0; - for (int i = 0; i < comm->send_data->num_msgs; i++) - { - proc = comm->send_data->procs[i]; - start = comm->send_data->indptr[i]; - end = comm->send_data->indptr[i+1]; - size = end - start; - RAPtor_MPI_Probe(proc, comm->key, comm->mpi_comm, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_PACKED, &count); - if (count > (int)recv_buffer.size()) - { - recv_buffer.resize(count); - } - RAPtor_MPI_Recv(&(recv_buffer[0]), count, RAPtor_MPI_PACKED, proc, - comm->key, comm->mpi_comm, &recv_status); - ctr = 0; - for (int j = 0; j < size; j++) - { - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &col_size, 1, RAPtor_MPI_INT, comm->mpi_comm); - recv_mat->idx2.resize(recv_mat->nnz + col_size); - recv_mat->vals.resize(recv_mat->nnz + col_size); - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &(recv_mat->idx2[recv_mat->nnz]), col_size, - RAPtor_MPI_INT, comm->mpi_comm); - RAPtor_MPI_Unpack(recv_buffer.data(), count, &ctr, &(recv_mat->vals[recv_mat->nnz]), col_size, - RAPtor_MPI_DOUBLE, comm->mpi_comm); - recv_mat->nnz += col_size; - recv_mat->idx1[col_count+1] = recv_mat->nnz; - col_count++; - } - } - recv_mat->nnz = recv_mat->idx2.size(); - RAPtor_MPI_Waitall(comm->recv_data->num_msgs, comm->recv_data->requests.data(), RAPtor_MPI_STATUSES_IGNORE); - - off_proc_T = new CSRMatrix(on_proc_num_cols, -1); - std::vector off_T_sizes(on_proc_num_cols, 0); - for (int i = 0; i < comm->send_data->size_msgs; i++) - { - row = comm->send_data->indices[i]; - start = recv_mat->idx1[i]; - end = recv_mat->idx1[i+1]; - off_T_sizes[row] += (end - start); - } - off_proc_T->idx1[0] = 0; - for (int i = 0; i < off_proc_T->n_rows; i++) - { - off_proc_T->idx1[i+1] = off_proc_T->idx1[i] + off_T_sizes[i]; - off_T_sizes[i] = 0; - } - off_proc_T->nnz = off_proc_T->idx1[off_proc_T->n_rows]; - off_proc_T->idx2.resize(off_proc_T->nnz); - off_proc_T->vals.resize(off_proc_T->nnz); - for (int i = 0; i < comm->send_data->size_msgs; i++) - { - row = comm->send_data->indices[i]; - start = recv_mat->idx1[i]; - end = recv_mat->idx1[i+1]; - for (int j = start; j < end; j++) - { - idx = off_proc_T->idx1[row] + off_T_sizes[row]++; - off_proc_T->idx2[idx] = recv_mat->idx2[j]; - off_proc_T->vals[idx] = recv_mat->vals[j]; - } - } - - T = new ParCSRMatrix(part_T, on_proc_T, off_proc_T); - - delete send_mat; - delete recv_mat; - - return T; -} - -ParCOOMatrix* ParCOOMatrix::transpose() -{ - ParCSRMatrix* A_csr = to_ParCSR(); - ParCSRMatrix* AT_csr = A_csr->transpose(); - delete A_csr; - - ParCOOMatrix* AT = AT_csr->to_ParCOO(); - delete AT_csr; - - return AT; -} -ParCSCMatrix* ParCSCMatrix::transpose() -{ - // TODO -- Shouldn't have to convert first - ParCSRMatrix* A_csr = to_ParCSR(); - ParCSRMatrix* AT_csr = A_csr->transpose(); - delete A_csr; - - ParCSCMatrix* AT = AT_csr->to_ParCSC(); - delete AT_csr; - - return AT; -} - -// Assumes block_row_size and block_col_size evenly divide local row/col sizes -ParBSRMatrix* ParCSRMatrix::to_ParBSR(const int block_row_size, const int block_col_size) -{ - int start, end, col; - int prev_row, prev_col; - int block_row, block_col; - int block_pos, col_pos; - int global_col, pos; - double val; - - int global_block_rows = global_num_rows / block_row_size; - int global_block_cols = global_num_cols / block_col_size; - ParBSRMatrix* A = new ParBSRMatrix(global_block_rows, global_block_cols, - block_row_size, block_col_size); - - // Get local to global mappings for block matrix - prev_row = -1; - for (std::vector::iterator it = local_row_map.begin(); - it != local_row_map.end(); ++it) - { - block_row = *it / block_row_size; - if (block_row != prev_row) - { - A->local_row_map.emplace_back(block_row); - prev_row = block_row; - } - } - if (global_num_rows == global_num_cols) - { - A->on_proc_column_map = A->get_local_row_map(); - } - else - { - prev_col = -1; - for (std::vector::iterator it = on_proc_column_map.begin(); - it != on_proc_column_map.end(); ++it) - { - block_col = *it / block_col_size; - if (block_col != prev_col) - { - A->on_proc_column_map.emplace_back(block_row); - prev_col = block_row; - } - } - } - - prev_col = -1; - std::map global_to_block_local; - for (std::vector::iterator it = off_proc_column_map.begin(); - it != off_proc_column_map.end(); ++it) - { - block_col = *it / block_col_size; - if (block_col != prev_col) - { - global_to_block_local[block_col] = A->off_proc_column_map.size(); - A->off_proc_column_map.emplace_back(block_col); - prev_col = block_col; - } - } - A->local_num_rows = A->local_row_map.size(); - A->on_proc_num_cols = A->local_num_rows; - A->off_proc_num_cols = A->off_proc_column_map.size(); - A->off_proc->n_cols = A->off_proc_num_cols; - - BSRMatrix* A_on_proc = (BSRMatrix*) A->on_proc; - BSRMatrix* A_off_proc = (BSRMatrix*) A->off_proc; - - A_on_proc->idx1[0] = 0; - A_off_proc->idx1[0] = 0; - for (int i = 0; i < local_num_rows; i += block_row_size) - { - std::vector on_proc_pos(A->on_proc_num_cols, -1); - std::vector off_proc_pos(A->off_proc_num_cols, -1); - for (int row_pos = 0; row_pos < block_row_size; row_pos++) - { - start = on_proc->idx1[i+row_pos]; - end = on_proc->idx1[i+row_pos+1]; - for (int k = start; k < end; k++) - { - col = on_proc->idx2[k]; - block_col = col / block_col_size; - if (on_proc_pos[block_col] == -1) - { - on_proc_pos[block_col] = A_on_proc->idx2.size(); - A_on_proc->idx2.emplace_back(block_col); - A_on_proc->block_vals.emplace_back( - new double[A_on_proc->b_size]()); - } - val = on_proc->vals[k]; - pos = on_proc_pos[block_col]; - col_pos = col % block_col_size; - block_pos = row_pos * block_col_size + col_pos; - A_on_proc->block_vals[pos][block_pos] = val; - } - - start = off_proc->idx1[i+row_pos]; - end = off_proc->idx1[i+row_pos+1]; - for (int k = start; k < end; k++) - { - col = off_proc->idx2[k]; - global_col = off_proc_column_map[col]; - block_col = global_to_block_local[global_col / block_col_size]; - if (off_proc_pos[block_col] == -1) - { - off_proc_pos[block_col] = A_off_proc->idx2.size(); - A_off_proc->idx2.emplace_back(block_col); - A_off_proc->block_vals.emplace_back( - new double[A_off_proc->b_size]()); - } - val = off_proc->vals[k]; - pos = off_proc_pos[block_col]; - col_pos = global_col % block_col_size; - block_pos = row_pos * block_col_size + col_pos; - A_off_proc->block_vals[pos][block_pos] = val; - } - } - A_on_proc->idx1[i/block_row_size + 1] = A_on_proc->idx2.size(); - A_off_proc->idx1[i/block_row_size + 1] = A_off_proc->idx2.size(); - } - A_on_proc->nnz = A_on_proc->idx2.size(); - A_off_proc->nnz = A_off_proc->idx2.size(); - - A->comm = new ParComm(A->partition, A->off_proc_column_map); - - return A; -} - -void ParMatrix::init_tap_communicators(RAPtor_MPI_Comm mpi_comm) -{ - /********************************* - * Initialize - * *******************************/ - // Get RAPtor_MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(mpi_comm, &rank); - RAPtor_MPI_Comm_size(mpi_comm, &num_procs); - - // Initialize standard tap_comm - tap_comm = new TAPComm(partition, true); - - // Initialize Variables - std::vector off_proc_col_to_proc; - std::vector on_node_column_map; - std::vector on_node_col_to_proc; - std::vector off_node_column_map; - std::vector off_node_col_to_proc; - std::vector on_node_to_off_proc; - std::vector off_node_to_off_proc; - std::vector recv_nodes; - std::vector orig_procs; - std::vector node_to_local_proc; - std::vector on_proc_to_new; - int on_proc_nc = on_proc_column_map.size(); - if (partition->local_num_cols) - { - on_proc_to_new.resize(partition->local_num_cols); - for (int i = 0; i < on_proc_nc; i++) - { - on_proc_to_new[on_proc_column_map[i] - partition->first_local_col] = i; - } - } - - /********************************* - * Split columns by processes, - * on-node, and off-node - * *******************************/ - // Find process on which vector value associated with each column is - // stored - partition->form_col_to_proc(off_proc_column_map, off_proc_col_to_proc); - - // Partition off_proc cols into on_node and off_node - tap_comm->split_off_proc_cols(off_proc_column_map, off_proc_col_to_proc, - on_node_column_map, on_node_col_to_proc, on_node_to_off_proc, - off_node_column_map, off_node_col_to_proc, off_node_to_off_proc); - - // Form local_L_par_comm: fully local communication (origin and - // destination processes both local to node) - tap_comm->form_local_L_par_comm(on_node_column_map, on_node_col_to_proc, - partition->first_local_col); - for (std::vector::iterator it = tap_comm->local_L_par_comm->send_data->indices.begin(); - it != tap_comm->local_L_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } - - - /********************************* - * Form standard 3-step - * node-aware communicator - * *******************************/ - // Gather all nodes with which any local process must communication - tap_comm->form_local_R_par_comm(off_node_column_map, off_node_col_to_proc, - orig_procs); - - // Find global processes with which rank communications - tap_comm->form_global_par_comm(orig_procs); - - // Form local_S_par_comm: initial distribution of values among local - // processes, before inter-node communication - tap_comm->form_local_S_par_comm(orig_procs); - - // Adjust send indices (currently global vector indices) to be index - // of global vector value from previous recv - tap_comm->adjust_send_indices(partition->first_local_col); - - - tap_comm->update_recv(on_node_to_off_proc, off_node_to_off_proc); - for (std::vector::iterator it = tap_comm->local_S_par_comm->send_data->indices.begin(); - it != tap_comm->local_S_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } - - - /********************************* - * Form simple 2-step - * node-aware communicator - * *******************************/ - // Create simple (2-step) TAPComm for matrix communication - // Copy local_L_par_comm from 3-step tap_comm - tap_mat_comm = new TAPComm(partition, false, tap_comm->local_L_par_comm); - - // Form local recv communicator. Will recv from local rank - // corresponding to global rank on which data originates. E.g. if - // data is on rank r = (p, n), and my rank is s = (q, m), I will - // recv data from (p, m). - tap_mat_comm->form_simple_R_par_comm(off_node_column_map, off_node_col_to_proc); - - // Form global par comm.. Will recv from proc on which data - // originates - tap_mat_comm->form_simple_global_comm(off_node_col_to_proc); - - // Adjust send indices (currently global vector indices) to be - // index of global vector value from previous recv (only updating - // local_R to match position in global) - tap_mat_comm->adjust_send_indices(partition->first_local_col); - - tap_mat_comm->update_recv(on_node_to_off_proc, off_node_to_off_proc, false); - - for (std::vector::iterator it = - tap_mat_comm->global_par_comm->send_data->indices.begin(); - it != tap_mat_comm->global_par_comm->send_data->indices.end(); ++it) - { - *it = on_proc_to_new[*it]; - } -} diff --git a/raptor/core/par_matrix.hpp b/raptor/core/par_matrix.hpp deleted file mode 100644 index 20326145..00000000 --- a/raptor/core/par_matrix.hpp +++ /dev/null @@ -1,852 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef PARMATRIX_HPP -#define PARMATRIX_HPP - -#include -#include -#include - -#include "matrix.hpp" -#include "par_vector.hpp" -#include "comm_pkg.hpp" -#include "mpi_types.hpp" -#include "partition.hpp" - -// Making Par Matrix an abstract Class -/************************************************************** - ***** ParMatrix Class - ************************************************************** - ***** This class constructs a parallel matrix object, holding - ***** a local diagonal matrix, a local off-diagonal block matrix, - ***** and communication information - ***** - ***** Attributes - ***** ------------- - ***** global_num_rows : index_t - ***** Number of rows in the global parallel matrix - ***** global_num_cols : index_t - ***** Number of columns in the parallel matrix - ***** local_nnz : int - ***** Number of nonzeros stored locally - ***** local_num_rows : int - ***** Number of rows stored locally - ***** first_local_row : index_t - ***** Global index of first row local to process - ***** first_local_col : index_t - ***** Global index of first column to fall in local block - ***** diag : Matrix* - ***** Matrix storing local diagonal block - ***** offd : Matrix* - ***** Matrix storing local off-diagonal block - ***** offd_num_cols : index_t - ***** Number of columns in the off-diagonal matrix - ***** offd_column_map : std::vector - ***** Maps local columns of offd Matrix to global - ***** comm : ParComm* - ***** Parallel communicator for matrix - ***** - ***** Methods - ***** ------- - ***** initialize_partition() - ***** Determines which rows are local to process and which - ***** columns fall in local block - ***** add_value() - ***** Adds a value to a given local row and global column. - ***** Determines if this value is in the diagonal or - ***** off-diagonal block. - ***** add_global_value() - ***** Adds a value to a given global row and global column. - ***** Determines if this value is in the diagonal or - ***** off-diagonal block. - ***** finalize() - ***** Finalizes a matrix after values have been added. - ***** Converts the matrices to the appropriate formats and - ***** creates the parallel communicator. - **************************************************************/ -namespace raptor -{ - class ParComm; - class TAPComm; - class ParCOOMatrix; - class ParBCOOMatrix; - class ParCSRMatrix; - class ParBSRMatrix; - class ParCSCMatrix; - class ParBSCMatrix; - - class ParMatrix - { - public: - ParMatrix(Partition* part) - { - partition = part; - partition->num_shared++; - - global_num_rows = partition->global_num_rows; - global_num_cols = partition->global_num_cols; - on_proc_num_cols = partition->local_num_cols; - local_num_rows = partition->local_num_rows; - - comm = NULL; - tap_comm = NULL; - tap_mat_comm = NULL; - on_proc = NULL; - off_proc = NULL; - } - - ParMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, - int on_proc_cols) - { - partition = part; - partition->num_shared++; - - global_num_rows = glob_rows; - global_num_cols = glob_cols; - on_proc_num_cols = on_proc_cols; - local_num_rows = local_rows; - - comm = NULL; - tap_comm = NULL; - tap_mat_comm = NULL; - on_proc = NULL; - off_proc = NULL; - } - - ParMatrix(index_t glob_rows, index_t glob_cols) - { - partition = new Partition(glob_rows, glob_cols); - - global_num_rows = partition->global_num_rows; - global_num_cols = partition->global_num_cols; - on_proc_num_cols = partition->local_num_cols; - local_num_rows = partition->local_num_rows; - - comm = NULL; - tap_comm = NULL; - tap_mat_comm = NULL; - on_proc = NULL; - off_proc = NULL; - } - - ParMatrix(index_t glob_rows, - index_t glob_cols, - int local_rows, - int local_cols, - index_t first_row, - index_t first_col, - Topology* topology = NULL) - { - partition = new Partition(glob_rows, glob_cols, - local_rows, local_cols, first_row, first_col, topology); - - global_num_rows = partition->global_num_rows; - global_num_cols = partition->global_num_cols; - on_proc_num_cols = partition->local_num_cols; - local_num_rows = partition->local_num_rows; - - comm = NULL; - tap_comm = NULL; - tap_mat_comm = NULL; - on_proc = NULL; - off_proc = NULL; - } - - ParMatrix() - { - local_num_rows = 0; - global_num_rows = 0; - global_num_cols = 0; - off_proc_num_cols = 0; - on_proc_num_cols = 0; - - comm = NULL; - tap_comm = NULL; - tap_mat_comm = NULL; - - on_proc = NULL; - off_proc = NULL; - - partition = NULL; - } - - virtual ~ParMatrix() - { - delete off_proc; - delete on_proc; - - if (comm) comm->delete_comm(); - if (tap_comm) tap_comm->delete_comm(); - if (tap_mat_comm) tap_mat_comm->delete_comm(); - - if (partition) - { - if (partition->num_shared) - { - partition->num_shared--; - } - else - { - delete partition; - } - } - } - - /************************************************************** - ***** ParMatrix Add Value - ************************************************************** - ***** Adds a value to the local portion of the parallel matrix, - ***** determining whether it should be added to diagonal or - ***** off-diagonal block. - ***** - ***** Parameters - ***** ------------- - ***** local_row : index_t - ***** Local row of value - ***** global_col : index_t - ***** Global column of value - ***** value : data_t - ***** Value to be added to parallel matrix - **************************************************************/ - void add_value(index_t row, index_t global_col, data_t value); - - /************************************************************** - ***** ParMatrix Add Global Value - ************************************************************** - ***** Adds a value to the local portion of the parallel matrix, - ***** determining whether it should be added to diagonal or - ***** off-diagonal block. - ***** - ***** Parameters - ***** ------------- - ***** global_row : index_t - ***** Global row of value - ***** global_col : index_t - ***** Global column of value - ***** value : data_t - ***** Value to be added to parallel matrix - **************************************************************/ - void add_global_value(int row, int global_col, double value); - - /************************************************************** - ***** ParMatrix Finalize - ************************************************************** - ***** Finalizes the diagonal and off-diagonal matrices. Sorts - ***** the local_to_global indices, and creates the parallel - ***** communicator - **************************************************************/ - void finalize(bool create_comm = true); //b_cols added for BSR - - int* map_partition_to_local(); - void condense_off_proc(); - - void residual(ParVector& x, ParVector& b, ParVector& r, bool tap = false); - void tap_residual(ParVector& x, ParVector& b, ParVector& r); - void mult(ParVector& x, ParVector& b, bool tap = false); - void tap_mult(ParVector& x, ParVector& b); - void mult_append(ParVector& x, ParVector& b, bool tap = false); - void tap_mult_append(ParVector& x, ParVector& b); - void mult_T(ParVector& x, ParVector& b, bool tap = false); - void tap_mult_T(ParVector& x, ParVector& b); - ParMatrix* mult(ParCSRMatrix* B, bool tap = false); - ParMatrix* tap_mult(ParCSRMatrix* B); - ParMatrix* mult_T(ParCSCMatrix* B, bool tap = false); - ParMatrix* mult_T(ParCSRMatrix* B, bool tap = false); - ParMatrix* tap_mult_T(ParCSCMatrix* B); - ParMatrix* tap_mult_T(ParCSRMatrix* B); - ParMatrix* add(ParCSRMatrix* A); - ParMatrix* subtract(ParCSRMatrix* A); - - void init_tap_communicators(RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD); - void update_tap_comm(ParMatrix* old, const std::vector& old_to_new) - { - tap_comm = new TAPComm((TAPComm*) old->tap_comm, old_to_new, NULL); - tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, old_to_new, - tap_comm->local_L_par_comm); - } - void update_tap_comm(ParMatrix* old, const std::vector& on_old_to_new, - const std::vector& off_old_to_new) - { - tap_comm = new TAPComm((TAPComm*) old->tap_comm, on_old_to_new, off_old_to_new, - NULL); - tap_mat_comm = new TAPComm((TAPComm*) old->tap_mat_comm, on_old_to_new, - off_old_to_new, tap_comm->local_L_par_comm); - } - - - - void sort() - { - on_proc->sort(); - off_proc->sort(); - } - - virtual ParMatrix* transpose() = 0; - - std::vector& get_off_proc_column_map() - { - return off_proc_column_map; - } - - std::vector& get_on_proc_column_map() - { - return on_proc_column_map; - } - - std::vector& get_local_row_map() - { - return local_row_map; - } - - virtual ParCOOMatrix* to_ParCOO() = 0; - virtual ParCSRMatrix* to_ParCSR() = 0; - virtual ParCSCMatrix* to_ParCSC() = 0; - virtual ParCOOMatrix* to_ParBCOO() = 0; - virtual ParCSRMatrix* to_ParBSR() = 0; - virtual ParCSCMatrix* to_ParBSC() = 0; - virtual ParMatrix* copy() = 0; - virtual void copy_helper(ParCSRMatrix* A); - virtual void copy_helper(ParCSCMatrix* A); - virtual void copy_helper(ParCOOMatrix* A); - void default_copy_helper(ParMatrix* A); - - // Store dimensions of parallel matrix - int local_nnz; - int local_num_rows; - int global_num_rows; - int global_num_cols; - int off_proc_num_cols; - int on_proc_num_cols; - - // Store two matrices: on_proc containing columns - // corresponding to vector values stored on_process - // and off_proc columns correspond to vector values - // stored off process (on other processes) - Matrix* on_proc; - Matrix* off_proc; - - // Store information about columns of off_proc - // It will be condensed to only store columns with - // nonzeros, and these must be mapped to - // global column indices - std::vector off_proc_column_map; // Maps off_proc local to global - std::vector on_proc_column_map; // Maps on_proc local to global - std::vector local_row_map; // Maps local rows to global - - // Parallel communication package indicating which - // processes hold vector values associated with off_proc, - // and which processes need vector values from this proc - Partition* partition; - ParComm* comm; - TAPComm* tap_comm; - TAPComm* tap_mat_comm; - }; - - class ParCOOMatrix : public ParMatrix - { - public: - ParCOOMatrix(bool form_mat = true) : ParMatrix() - { - if (form_mat) - { - on_proc = new COOMatrix(0, 0, 0); - off_proc = new COOMatrix(0, 0, 0); - } - } - - ParCOOMatrix(index_t glob_rows, - index_t glob_cols, - int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(glob_rows, glob_cols) - { - if (form_mat) - { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCOOMatrix(index_t glob_rows, index_t glob_cols, int local_rows, - int local_cols, index_t first_row, index_t first_col, - int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(glob_rows, glob_cols, - local_rows, local_cols, first_row, first_col) - { - if (form_mat) - { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCOOMatrix(Partition* part, - int nnz_per_row = 5, bool form_mat = true) : ParMatrix(part) - { - if (form_mat) - { - on_proc = new COOMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new COOMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParCOOMatrix* copy() - { - ParCOOMatrix* A = new ParCOOMatrix(); - A->copy_helper(this); - return A; - } - void copy_helper(ParCSRMatrix* A); - void copy_helper(ParCSCMatrix* A); - void copy_helper(ParCOOMatrix* A); - - void mult(ParVector& x, ParVector& b, bool tap = false); - void tap_mult(ParVector& x, ParVector& b); - void mult_T(ParVector& x, ParVector& b, bool tap = false); - void tap_mult_T(ParVector& x, ParVector& b); - - ParCOOMatrix* transpose(); - }; - - - class ParBCOOMatrix : public ParCOOMatrix - { - public: - ParBCOOMatrix() : ParCOOMatrix(false) - { - on_proc = new BCOOMatrix(0, 0, 1, 1, 0); - off_proc = new BCOOMatrix(0, 0, 1, 1, 0); - } - - ParBCOOMatrix(int global_block_rows, int global_block_cols, - int block_row_size, int block_col_size, int nnz_per_row) - : ParCOOMatrix(global_block_rows, global_block_cols, nnz_per_row, false) - { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz_per_row); - } - - ParBCOOMatrix(int global_block_rows, int global_block_cols, - int local_block_rows, int local_block_cols, - int first_block_row, int first_block_col, - int block_row_size, int block_col_size, int nnz_per_row = 5) - : ParCOOMatrix(global_block_rows, global_block_cols, - local_block_rows, local_block_cols, first_block_row, - first_block_col, nnz_per_row, false) - { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz_per_row); - } - - ParBCOOMatrix(Partition* part, int block_row_size, int block_col_size, - int nnz_per_row = 5) : ParCOOMatrix(part, nnz_per_row, false) - { - on_proc = new BCOOMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz_per_row); - off_proc = new BCOOMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz_per_row); - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParCOOMatrix* copy() - { - ParCOOMatrix* A = new ParCOOMatrix(); - A->copy_helper(this); - return A; - } - }; - - class ParCSRMatrix : public ParMatrix - { - public: - ParCSRMatrix(bool form_mat = true) : ParMatrix() - { - if (form_mat) - { - on_proc = new CSRMatrix(0, 0, 0); - off_proc = new CSRMatrix(0, 0, 0); - } - } - - ParCSRMatrix(index_t glob_rows, index_t glob_cols, int nnz = 0, - bool form_mat = true) : ParMatrix(glob_rows, glob_cols) - { - if (form_mat) - { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, - nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, - nnz); - } - } - - ParCSRMatrix(index_t glob_rows, index_t glob_cols, int local_rows, - int local_cols, index_t first_row, index_t first_col, Topology* topology = NULL, - int nnz = 0, bool form_mat = true) : ParMatrix(glob_rows, glob_cols, - local_rows, local_cols, first_row, first_col, topology) - { - if (form_mat) - { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, - nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, - nnz); - } - } - - ParCSRMatrix(Partition* part, - int nnz = 0, bool form_mat = true) : ParMatrix(part) - { - if (form_mat) - { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, - nnz); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, - nnz); - } - } - - ParCSRMatrix(Partition* part, Matrix* _on_proc, Matrix* _off_proc) : ParMatrix(part) - { - on_proc = _on_proc; - off_proc = _off_proc; - on_proc_num_cols = on_proc->n_cols; - off_proc_num_cols = off_proc->n_cols; - local_num_rows = on_proc->n_rows; - finalize(); - } - - - ParCSRMatrix(Partition* part, index_t glob_rows, index_t glob_cols, - int local_rows, int on_proc_cols, int off_proc_cols, int nnz = 0, - bool form_mat = true) : ParMatrix(part, glob_rows, glob_cols, - local_rows, on_proc_cols) - { - off_proc_num_cols = off_proc_cols; - if (form_mat) - { - on_proc = new CSRMatrix(local_num_rows, on_proc_cols, nnz); - off_proc = new CSRMatrix(local_num_rows, off_proc_num_cols, nnz); - } - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParCSRMatrix* copy() - { - ParCSRMatrix* A = new ParCSRMatrix(); - A->copy_helper(this); - return A; - } - - void copy_structure(ParBSRMatrix* A); - - ParBSRMatrix* to_ParBSR(const int block_row_size, const int block_col_size); - - void copy_helper(ParCSRMatrix* A); - void copy_helper(ParCSCMatrix* A); - void copy_helper(ParCOOMatrix* A); - - ParCSRMatrix* strength(strength_t strength_type, double theta = 0.0, - bool tap_amg = false, int num_variables = 1, int* variables = NULL); - ParCSRMatrix* aggregate(); - ParCSRMatrix* fit_candidates(double* B, double* R, int num_candidates, - double tol = 1e-10); - int maximal_independent_set(std::vector& local_states, - std::vector& off_proc_states, int max_iters = -1); - - void mult(ParVector& x, ParVector& b, bool tap = false); - void tap_mult(ParVector& x, ParVector& b); - void mult_T(ParVector& x, ParVector& b, bool tap = false); - void tap_mult_T(ParVector& x, ParVector& b); - ParCSRMatrix* mult(ParCSRMatrix* B, bool tap = false); - ParCSRMatrix* tap_mult(ParCSRMatrix* B); - ParCSRMatrix* mult_T(ParCSCMatrix* A, bool tap = false); - ParCSRMatrix* mult_T(ParCSRMatrix* A, bool tap = false); - ParCSRMatrix* tap_mult_T(ParCSCMatrix* A); - ParCSRMatrix* tap_mult_T(ParCSRMatrix* A); - ParCSRMatrix* add(ParCSRMatrix* A); - ParCSRMatrix* subtract(ParCSRMatrix* B); - - void print_mult(ParCSRMatrix* B); - void print_mult_T(ParCSCMatrix* A); - void print_mult(); - void print_mult_T(); - - void mult_helper(ParCSRMatrix* B, ParCSRMatrix* C, CSRMatrix* recv, - CSRMatrix* C_on_on, CSRMatrix* C_on_off); - CSRMatrix* mult_T_partial(ParCSCMatrix* A); - CSRMatrix* mult_T_partial(CSCMatrix* A_off); - void mult_T_combine(ParCSCMatrix* A, ParCSRMatrix* C, CSRMatrix* recv_mat, - CSRMatrix* C_on_on, CSRMatrix* C_off_on); - - ParCSRMatrix* transpose(); - }; - - class ParBSRMatrix : public ParCSRMatrix - { - public: - ParBSRMatrix() : ParCSRMatrix(false) - { - on_proc = new BSRMatrix(0, 0, 1, 1, 0); - off_proc = new BSRMatrix(0, 0, 1, 1, 0); - } - - ParBSRMatrix(int global_block_rows, int global_block_cols, - int block_row_size, int block_col_size, - int nnz = 0) - : ParCSRMatrix(global_block_rows, global_block_cols, nnz, false) - { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz); - } - - ParBSRMatrix(int global_block_rows, int global_block_cols, - int local_block_rows, int local_block_cols, - int first_block_row, int first_block_col, - int block_row_size, int block_col_size, - Topology* topology = NULL, int nnz = 0) - : ParCSRMatrix(global_block_rows, global_block_cols, - local_block_rows, local_block_cols, - first_block_row, first_block_col, topology, - nnz, false) - { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz); - } - - ParBSRMatrix(Partition* part, int block_row_size, int block_col_size, - int nnz = 0) : ParCSRMatrix(part, nnz, false) - { - on_proc = new BSRMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz); - } - - ParBSRMatrix(Partition* part, BSRMatrix* _on_proc, BSRMatrix* _off_proc) - : ParCSRMatrix(part) - { - on_proc = _on_proc; - off_proc = _off_proc; - on_proc_num_cols = on_proc->n_cols; - off_proc_num_cols = off_proc->n_cols; - local_num_rows = on_proc->n_rows; - finalize(); - } - - ParBSRMatrix(Partition* part, int global_block_rows, int global_block_cols, - int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, - int block_row_size, int block_col_size, int nnz = 0) - : ParCSRMatrix(part, global_block_rows, global_block_cols, - local_block_rows, on_proc_block_cols, off_proc_block_cols, - nnz, false) - { - off_proc_num_cols = off_proc_block_cols; - on_proc = new BSRMatrix(local_block_rows, on_proc_block_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSRMatrix(local_block_rows, off_proc_num_cols, - block_row_size, block_col_size, nnz); - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParBSRMatrix* copy() - { - ParBSRMatrix* A = new ParBSRMatrix(); - A->copy_helper(this); - return A; - } - - }; - - - - class ParCSCMatrix : public ParMatrix - { - public: - ParCSCMatrix(bool form_mat = true) : ParMatrix() - { - if (form_mat) - { - on_proc = new CSCMatrix(0, 0, 0); - off_proc = new CSCMatrix(0, 0, 0); - } - } - - ParCSCMatrix(index_t glob_rows, index_t glob_cols, int nnz_per_row = 5, - bool form_mat = true) : ParMatrix(glob_rows, glob_cols) - { - if (form_mat) - { - on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCSCMatrix(index_t glob_rows, index_t glob_cols, int local_n_rows, - int local_n_cols, index_t first_row, index_t first_col, - int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(glob_rows, glob_cols, local_n_rows, local_n_cols, - first_row, first_col) - { - if (form_mat) - { - on_proc = new CSCMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new CSCMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCSCMatrix(Partition* part, index_t glob_rows, index_t glob_cols, int local_rows, - int on_proc_cols, int off_proc_cols, int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(part, glob_rows, glob_cols, local_rows, on_proc_cols) - { - off_proc_num_cols = off_proc_cols; - if (form_mat) - { - on_proc = new CSCMatrix(local_num_rows, on_proc_cols, nnz_per_row); - off_proc = new CSCMatrix(local_num_rows, off_proc_num_cols, nnz_per_row); - } - } - - - ParCSCMatrix(Partition* part, int nnz_per_row = 5, bool form_mat = true) - : ParMatrix(part) - { - if (form_mat) - { - on_proc = new CSRMatrix(partition->local_num_rows, partition->local_num_cols, - nnz_per_row); - off_proc = new CSRMatrix(partition->local_num_rows, partition->global_num_cols, - nnz_per_row); - } - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParCSCMatrix* copy() - { - ParCSCMatrix* A = new ParCSCMatrix(); - A->copy_helper(this); - return A; - } - - void copy_helper(ParCSRMatrix* A); - void copy_helper(ParCSCMatrix* A); - void copy_helper(ParCOOMatrix* A); - - void mult(ParVector& x, ParVector& b, bool tap); - void tap_mult(ParVector& x, ParVector& b); - void mult_T(ParVector& x, ParVector& b, bool tap); - void tap_mult_T(ParVector& x, ParVector& b); - - ParCSCMatrix* transpose(); - }; - - -class ParBSCMatrix : public ParCSCMatrix - { - public: - ParBSCMatrix() : ParCSCMatrix(false) - { - on_proc = new BSCMatrix(0, 0, 1, 1, 0); - off_proc = new BSCMatrix(0, 0, 1, 1, 0); - } - - ParBSCMatrix(int global_block_rows, int global_block_cols, - int block_row_size, int block_col_size, - int nnz = 0) - : ParCSCMatrix(global_block_rows, global_block_cols, nnz, false) - { - on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz); - } - - ParBSCMatrix(Partition* part, int block_row_size, int block_col_size, - int nnz = 0) : ParCSCMatrix(part, nnz, false) - { - on_proc = new BSCMatrix(partition->local_num_rows, partition->local_num_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(partition->local_num_rows, partition->global_num_cols, - block_row_size, block_col_size, nnz); - } - - ParBSCMatrix(Partition* part, int global_block_rows, int global_block_cols, - int local_block_rows, int on_proc_block_cols, int off_proc_block_cols, - int block_row_size, int block_col_size, int nnz = 0) - : ParCSCMatrix(part, global_block_rows, global_block_cols, local_block_rows, - on_proc_block_cols, off_proc_block_cols, nnz, false) - { - off_proc_num_cols = off_proc_block_cols; - on_proc = new BSCMatrix(local_num_rows, on_proc_block_cols, - block_row_size, block_col_size, nnz); - off_proc = new BSCMatrix(local_num_rows, off_proc_num_cols, - block_row_size, block_col_size, nnz); - } - - ParCOOMatrix* to_ParCOO(); - ParCSRMatrix* to_ParCSR(); - ParCSCMatrix* to_ParCSC(); - ParCOOMatrix* to_ParBCOO(); - ParCSRMatrix* to_ParBSR(); - ParCSCMatrix* to_ParBSC(); - - ParBSCMatrix* copy() - { - ParBSCMatrix* A = new ParBSCMatrix(); - A->copy_helper(this); - return A; - } - - }; - - -} -#endif diff --git a/raptor/core/par_vector.cpp b/raptor/core/par_vector.cpp deleted file mode 100644 index 0eb2fcdd..00000000 --- a/raptor/core/par_vector.cpp +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "par_vector.hpp" - -using namespace raptor; - - -/************************************************************** -***** Vector AXPY -************************************************************** -***** Multiplies the local vector by a constant, alpha, and then -***** sums each element with corresponding entry of Y -***** -***** Parameters -***** ------------- -***** y : ParVector* y -***** Vector to be summed with -***** alpha : data_t -***** Constant value to multiply each element of vector by -**************************************************************/ -void ParVector::axpy(ParVector& x, data_t alpha) -{ - if (local_n) - { - local.axpy(x.local, alpha); - } -} - -/************************************************************** -***** Vector Scale -************************************************************** -***** Multiplies the local vector by a constant, alpha -***** -***** Parameters -***** ------------- -***** alpha : data_t -***** Constant value to multiply each element of vector by -**************************************************************/ -void ParVector::scale(data_t alpha) -{ - if (local_n) - { - local.scale(alpha); - } -} - -/************************************************************** -***** ParVector Set Constant Value -************************************************************** -***** Sets each element of the local vector to a constant value -***** -***** Parameters -***** ------------- -***** alpha : data_t -***** Value to set each element of local vector to -**************************************************************/ -void ParVector::set_const_value(data_t alpha) -{ - if (local_n) - { - local.set_const_value(alpha); - } -} - -/************************************************************** -***** ParVector Set Random Values -************************************************************** -***** Sets each element of the local vector to a random value -**************************************************************/ -void ParVector::set_rand_values() -{ - if (local_n) - { - local.set_rand_values(); - } -} - -/************************************************************** -***** Vector Norm -************************************************************** -***** Calculates the P norm of the global vector (for a given P) -***** -***** Parameters -***** ------------- -***** p : index_t -***** Determines which p-norm to calculate -**************************************************************/ -data_t ParVector::norm(index_t p) -{ - data_t result = 0.0; - if (local_n) - { - result = local.norm(p); - result = pow(result, p); // undoing root of p from local operation - } - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, &result, 1, RAPtor_MPI_DATA_T, RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); - return pow(result, 1./p); -} - - -data_t ParVector::inner_product(ParVector& x) -{ - data_t inner_prod = 0.0; - - if (local_n != x.local_n) - { - int rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - printf("Error. Cannot perform inner product. Dimensions do not match.\n"); - exit(-1); - } - - if (local_n) - { - inner_prod = local.inner_product(x.local); - } - - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, &inner_prod, 1, RAPtor_MPI_DATA_T, RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); - - return inner_prod; -} - - diff --git a/raptor/core/par_vector.hpp b/raptor/core/par_vector.hpp deleted file mode 100644 index 4685e16a..00000000 --- a/raptor/core/par_vector.hpp +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_PARVECTOR_HPP -#define RAPTOR_CORE_PARVECTOR_HPP - -#include "assert.h" - -#include -#include - -#include "mpi_types.hpp" -#include "vector.hpp" - -/************************************************************** - ***** ParVector Class - ************************************************************** - ***** This class constructs a parallel vector, containing - ***** values for a local portion - ***** - ***** Attributes - ***** ------------- - ***** local : Vector* - ***** Local portion of the parallel vector - ***** global_n : index_t - ***** Number of entries in the global vector - ***** local_n : index_t - ***** Dimension of the local portion of the vector - ***** - ***** Methods - ***** ------- - ***** set_const_value(data_t alpha) - ***** Sets the local vector to a constant value - ***** set_rand_values() - ***** Sets each element of the local vector to a random value - ***** axpy(Vector& y, data_t alpha) - ***** Performs axpy on local portion of vector - ***** scale(data_t alpha) - ***** Multiplies entries of the local vector by a constant - ***** norm(index_t p) - ***** Calculates the p-norm of the global vector - **************************************************************/ -namespace raptor -{ - class ParVector - { - public: - /************************************************************** - ***** ParVector Class Constructor - ************************************************************** - ***** Sets the dimensions of the global vector and initializes - ***** an empty local vector of the given size - ***** - ***** Parameters - ***** ------------- - ***** glbl_n : index_t - ***** Number of entries in global vector - ***** lcl_n : index_t - ***** Number of entries of global vector stored locally - **************************************************************/ - ParVector(index_t glbl_n, int lcl_n) - { - resize(glbl_n, lcl_n); - } - - ParVector(const ParVector& x) - { - copy(x); - } - - /************************************************************** - ***** ParVector Class Constructor - ************************************************************** - ***** Creates an empty ParVector (local_n = 0) - **************************************************************/ - ParVector() - { - local_n = 0; - } - - /************************************************************** - ***** ParVector Class Destructor - ************************************************************** - ***** Deletes the local vector - **************************************************************/ - ~ParVector() - { - } - - void resize(index_t glbl_n, int lcl_n) - { - global_n = glbl_n; - local_n = lcl_n; - local.resize(local_n); - } - - void copy(const ParVector& x) - { - global_n = x.global_n; - local_n = x.local_n; - local.copy(x.local); - } - - /************************************************************** - ***** ParVector Set Constant Value - ************************************************************** - ***** Sets each element of the local vector to a constant value - ***** - ***** Parameters - ***** ------------- - ***** alpha : data_t - ***** Value to set each element of local vector to - **************************************************************/ - void set_const_value(data_t alpha); - - /************************************************************** - ***** ParVector Set Random Values - ************************************************************** - ***** Sets each element of the local vector to a random value - **************************************************************/ - void set_rand_values(); - - /************************************************************** - ***** Vector AXPY - ************************************************************** - ***** Multiplies the local vector by a constant, alpha, and then - ***** sums each element with corresponding entry of Y - ***** - ***** Parameters - ***** ------------- - ***** y : ParVector* y - ***** ParVector to be summed with - ***** alpha : data_t - ***** Constant value to multiply each element of vector by - **************************************************************/ - void axpy(ParVector& y, data_t alpha); - - /************************************************************** - ***** Vector Scale - ************************************************************** - ***** Multiplies the local vector by a constant, alpha - ***** - ***** Parameters - ***** ------------- - ***** alpha : data_t - ***** Constant value to multiply each element of vector by - **************************************************************/ - void scale(data_t alpha); - - /************************************************************** - ***** Vector Norm - ************************************************************** - ***** Calculates the P norm of the global vector (for a given P) - ***** - ***** Parameters - ***** ------------- - ***** p : index_t - ***** Determines which p-norm to calculate - **************************************************************/ - data_t norm(index_t p); - - data_t inner_product(ParVector& x); - - const data_t& operator[](const int index) const - { - return local.values[index]; - } - - data_t& operator[](const int index) - { - return local.values[index]; - } - - Vector local; - int global_n; - int local_n; - }; - -} -#endif diff --git a/raptor/core/partition.hpp b/raptor/core/partition.hpp deleted file mode 100644 index c1878b9c..00000000 --- a/raptor/core/partition.hpp +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef PARTITION_HPP -#define PARTITION_HPP - -#include -#include -#include - -#include "types.hpp" -#include "topology.hpp" - -#define STANDARD_PPN 4 -#define STANDARD_PROC_LAYOUT 1 - -/************************************************************** - ***** Partition Class - ************************************************************** - ***** This class holds the partition of a number of vertices - ***** across a number of processes - ***** - ***** Attributes - ***** ------------- - ***** global_num_indices : index_t - ***** Number of rows to be partitioned - ***** first_local_idx : index_t - ***** First global index of a row in partition local to rank - ***** local_num_indices : index_t - ***** Number of rows local to rank's partition - ***** - ***** Methods - ***** --------- - **************************************************************/ -namespace raptor -{ - class Partition - { - public: - Partition(index_t _global_num_rows, index_t _global_num_cols, - Topology* _topology = NULL) - { - int rank, num_procs; - int avg_num; - int extra; - - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - global_num_rows = _global_num_rows; - global_num_cols = _global_num_cols; - - // Partition rows across processes - avg_num = global_num_rows / num_procs; - extra = global_num_rows % num_procs; - first_local_row = avg_num * rank; - local_num_rows = avg_num; - if (extra > rank) - { - first_local_row += rank; - local_num_rows++; - } - else - { - first_local_row += extra; - } - - // Partition cols across processes - if (global_num_rows < num_procs) - { - num_procs = global_num_rows; - } - avg_num = global_num_cols / num_procs; - extra = global_num_cols % num_procs; - if (local_num_rows) - { - first_local_col = avg_num * rank; - local_num_cols = avg_num; - if (extra > rank) - { - first_local_col += rank; - local_num_cols++; - } - else - { - first_local_col += extra; - } - } - else - { - first_local_col = 0; - local_num_cols = 0; - } - - last_local_row = first_local_row + local_num_rows - 1; - last_local_col = first_local_col + local_num_cols - 1; - - num_shared = 0; - - create_assumed_partition(); - - if (_topology == NULL) - { - topology = new Topology(); - } - else - { - topology = _topology; - topology->num_shared++; - } - } - - Partition(index_t _global_num_rows, index_t _global_num_cols, - index_t _brows, index_t _bcols, Topology* _topology = NULL) - { - int rank, num_procs; - int avg_num_blocks, global_num_row_blocks, global_num_col_blocks; - int extra; - - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - global_num_rows = _global_num_rows; - global_num_cols = _global_num_cols; - - // Partition rows across processes - global_num_row_blocks = global_num_rows / _brows; - avg_num_blocks = global_num_row_blocks / num_procs; - extra = global_num_row_blocks % num_procs; - first_local_row = avg_num_blocks * rank * _brows; - local_num_rows = avg_num_blocks * _brows; - if (extra > rank) - { - first_local_row += rank * _brows; - local_num_rows += _brows; - } - else - { - first_local_row += extra * _brows; - } - - // Partition cols across processes - // local_num_cols = number of cols in on_proc matrix - if (global_num_row_blocks < num_procs) - { - num_procs = global_num_row_blocks; - } - - global_num_col_blocks = global_num_cols / _bcols; - avg_num_blocks = global_num_col_blocks / num_procs; - extra = global_num_col_blocks % num_procs; - if (local_num_rows) - { - first_local_col = avg_num_blocks * rank * _bcols; - local_num_cols = avg_num_blocks * _bcols; - if (extra > rank) - { - first_local_col += rank * _bcols; - local_num_cols += _bcols; - } - else - { - first_local_col += extra * _bcols; - } - } - else - { - local_num_cols = 0; - } - - last_local_row = first_local_row + local_num_rows - 1; - last_local_col = first_local_col + local_num_cols - 1; - - num_shared = 0; - - create_assumed_partition(); - - if (_topology == NULL) - { - topology = new Topology(); - } - else - { - topology = _topology; - topology->num_shared++; - } - } - - Partition(index_t _global_num_rows, index_t _global_num_cols, - int _local_num_rows, int _local_num_cols, - index_t _first_local_row, index_t _first_local_col, - Topology* _topology = NULL) - { - global_num_rows = _global_num_rows; - global_num_cols = _global_num_cols; - local_num_rows = _local_num_rows; - local_num_cols = _local_num_cols; - first_local_row = _first_local_row; - first_local_col = _first_local_col; - last_local_row = first_local_row + local_num_rows - 1; - last_local_col = first_local_col + local_num_cols - 1; - - num_shared = 0; - - create_assumed_partition(); - - if (_topology == NULL) - { - topology = new Topology(); - } - else - { - topology = _topology; - topology->num_shared++; - } - } - - Partition(Topology* _topology = NULL) - { - if (_topology == NULL) - { - topology = new Topology(); - } - else - { - topology = _topology; - topology->num_shared++; - } - - num_shared = 0; - global_num_rows = 0; - global_num_cols = 0; - local_num_rows = 0; - local_num_cols = 0; - first_local_row = 0; - first_local_col = 0; - last_local_row = 0; - last_local_col = 0; - assumed_num_cols = 0; - } - - Partition(Partition* A, Partition* B) - { - global_num_rows = A->global_num_rows; - global_num_cols = B->global_num_cols; - local_num_rows = A->local_num_rows; - local_num_cols = B->local_num_cols; - first_local_row = A->first_local_row; - first_local_col = B->first_local_col; - last_local_row = A->last_local_row; - last_local_col = B->last_local_col; - - num_shared = 0; - - assumed_num_cols = B->assumed_num_cols; - first_cols.resize(B->first_cols.size()); - std::copy(B->first_cols.begin(), B->first_cols.end(), - first_cols.begin()); - - create_assumed_partition(); - - topology = A->topology; - topology->num_shared++; - } - - Partition* transpose() - { - return new Partition(global_num_cols, global_num_rows, - local_num_cols, local_num_rows, first_local_col, - first_local_row, topology); - } - - ~Partition() - { - if (topology->num_shared) - { - topology->num_shared--; - } - else - { - delete topology; - } - } - - void create_assumed_partition() - { - // Get RAPtor_MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - assumed_num_cols = global_num_cols / num_procs; - if (global_num_cols % num_procs) assumed_num_cols++; - - first_cols.resize(num_procs+1); - RAPtor_MPI_Allgather(&(first_local_col), 1, RAPtor_MPI_INT, first_cols.data(), 1, RAPtor_MPI_INT, - RAPtor_MPI_COMM_WORLD); - first_cols[num_procs] = global_num_cols; - } - - void form_col_to_proc (const std::vector& off_proc_column_map, - std::vector& off_proc_col_to_proc) - { - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - int global_col, assumed_proc; - int ctr = 0; - off_proc_col_to_proc.resize(off_proc_column_map.size()); - for (std::vector::const_iterator it = off_proc_column_map.begin(); - it != off_proc_column_map.end(); ++it) - { - global_col = *it; - assumed_proc = global_col / assumed_num_cols; - while (global_col < first_cols[assumed_proc]) - { - assumed_proc--; - } - while (assumed_proc < num_procs - 1 && global_col >= first_cols[assumed_proc+1]) - { - assumed_proc++; - } - off_proc_col_to_proc[ctr++] = assumed_proc; - } - } - - - index_t global_num_rows; - index_t global_num_cols; - int local_num_rows; - int local_num_cols; - index_t first_local_row; - index_t first_local_col; - index_t last_local_row; - index_t last_local_col; - - int assumed_num_cols; - std::vector first_cols; - - Topology* topology; - - int num_shared; // Number of ParMatrix classes using partition - - }; -} -#endif - - - diff --git a/raptor/core/tap_comm.cpp b/raptor/core/tap_comm.cpp deleted file mode 100644 index 3d744908..00000000 --- a/raptor/core/tap_comm.cpp +++ /dev/null @@ -1,1158 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "comm_pkg.hpp" - -//#include -//#include - -using namespace raptor; - -/************************************************************** -***** Split Off Proc Cols -************************************************************** -***** Splits off_proc_column_map into on_node_column_map and -***** off_node_column map. Also maps each of these columns to -***** their corresponding process, and maps each local index -***** in on_node and off_node to off_proc -***** -***** Parameters -***** ------------- -***** off_proc_column_map : std::vector& -***** Vector holding rank's off_proc_columns -***** off_proc_col_to_proc : std::vector& -***** Vector mapping rank's off_proc_columns to distant procs -***** on_node_column_map : std::vector& -***** Will be returned holding on_node columns -***** on_node_col_to_proc : std::vector& -***** Will be returned holding procs corresponding to on_node cols -***** on_node_to_off_proc : std::vector& -***** Will be returned holding map from on_node to off_proc -***** off_node_column_map : std::vector& -***** Will be returned holding off_node columns -***** off_node_col_to_node : std::vector& -***** Will be returned holding procs corresponding to off_node cols -***** off_node_to_off_proc : std::vector& -***** Will be returned holding map from off_node to off_proc -**************************************************************/ -void TAPComm::split_off_proc_cols(const std::vector& off_proc_column_map, - const std::vector& off_proc_col_to_proc, - std::vector& on_node_column_map, - std::vector& on_node_col_to_proc, - std::vector& on_node_to_off_proc, - std::vector& off_node_column_map, - std::vector& off_node_col_to_proc, - std::vector& off_node_to_off_proc) -{ - int rank, rank_node, num_procs; - int proc; - int node; - int global_col; - int off_proc_num_cols = off_proc_column_map.size(); - - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - rank_node = topology->get_node(rank); - - // Reserve size in vectors - - on_node_column_map.reserve(off_proc_num_cols); - on_node_col_to_proc.reserve(off_proc_num_cols); - off_node_column_map.reserve(off_proc_num_cols); - off_node_col_to_proc.reserve(off_proc_num_cols); - - for (int i = 0; i < off_proc_num_cols; i++) - { - proc = off_proc_col_to_proc[i]; - node = topology->get_node(proc); - global_col = off_proc_column_map[i]; - if (node == rank_node) - { - on_node_column_map.emplace_back(global_col); - on_node_col_to_proc.emplace_back(topology->get_local_proc(proc)); - on_node_to_off_proc.emplace_back(i); - } - else - { - off_node_column_map.emplace_back(global_col); - off_node_col_to_proc.emplace_back(proc); - off_node_to_off_proc.emplace_back(i); - } - } -} - - -/************************************************************** -***** Gather off node nodes -************************************************************** -***** Gathers nodes with which any local processes communicates -***** -***** Parameters -***** ------------- -***** off_node_col_to_node : std::vector& -***** Vector holding rank's off_node_columns -***** recv_nodes : std::vector& -***** Returned holding all nodes with which any local -***** process communicates (union of off_node_col_to_node) -**************************************************************/ -void TAPComm::form_local_R_par_comm(const std::vector& off_node_column_map, - const std::vector& off_node_col_to_proc, - std::vector& orig_procs) -{ - int local_rank; - RAPtor_MPI_Comm_rank(topology->local_comm, &local_rank); - - // Declare Variables - int int_size = sizeof(int); - - int node; - int num_recv_nodes; - int local_proc; - int idx, idx_p, proc; - int start_ctr, ctr; - int local_num_sends; - int recv_start, recv_end; - int recv_proc, recv_s; - int count, pos; - int off_node_num_cols = off_node_column_map.size(); - int N = topology->num_nodes / int_size; - if (topology->num_nodes % int_size) - { - N++; - } - std::vector tmp_recv_nodes(N, 0); - std::vector nodal_recv_nodes(N, 0); - std::vector node_sizes(topology->num_nodes, 0); - std::vector nodal_off_node_sizes; - std::vector node_to_local_proc; - std::vector local_recv_procs(topology->PPN, 0); - std::vector local_recv_sizes(topology->PPN, 0); - std::vector local_send_procs(topology->PPN); - std::vector proc_idx; - std::vector off_node_col_to_lcl_proc; - std::vector send_buffer; - std::vector recv_nodes; - - RAPtor_MPI_Status recv_status; - - NonContigData* local_R_recv = (NonContigData*) local_R_par_comm->recv_data; - - // Find nodes from which rank must recv, and the size of each recv - for (std::vector::const_iterator it = off_node_col_to_proc.begin(); - it != off_node_col_to_proc.end(); ++it) - { - node = topology->get_node(*it); - idx = node / int_size; - pos = node % int_size; - tmp_recv_nodes[idx] |= 1 << pos; - node_sizes[node]++; - } - - // Allreduce among procs local to node to find nodes from which rank_node - // recvs - RAPtor_MPI_Allreduce(tmp_recv_nodes.data(), nodal_recv_nodes.data(), N, RAPtor_MPI_INT, - RAPtor_MPI_BOR, topology->local_comm); - - // Add nodes from which rank_node must recv to recv_nodes - for (int i = 0; i < N; i++) - { - for (int j = 0; j < int_size; j++) - { - if ((nodal_recv_nodes[i] >> j) & 1) - { - recv_nodes.emplace_back(i*int_size + j); - } - } - } - - // Find the number of nodes from which rank node recvs - num_recv_nodes = recv_nodes.size(); - - // Find the size of each nodal recv - if (num_recv_nodes) - { - // Collect the number of bytes sent to each node - nodal_off_node_sizes.resize(num_recv_nodes); - for (int i = 0; i < num_recv_nodes; i++) - { - node = recv_nodes[i]; - nodal_off_node_sizes[i] = node_sizes[node]; - } - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, nodal_off_node_sizes.data(), num_recv_nodes, RAPtor_MPI_INT, - RAPtor_MPI_SUM, topology->local_comm); - - // Sort nodes, descending by msg size (find permutation) - std::vector p(num_recv_nodes); - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), - [&](const int lhs, const int rhs) - { - return nodal_off_node_sizes[lhs] > nodal_off_node_sizes[rhs]; - }); - - // Sort recv nodes by total num bytes recvd from node - std::vector done(num_recv_nodes); - for (int i = 0; i < num_recv_nodes; i++) - { - if (done[i]) continue; - - done[i] = true; - int prev_j = i; - int j = p[i]; - while (i != j) - { - std::swap(recv_nodes[prev_j], recv_nodes[j]); - std::swap(nodal_off_node_sizes[prev_j], nodal_off_node_sizes[j]); - done[j] = true; - prev_j = j; - j = p[j]; - } - } - - // Map recv nodes to local processes - local_proc = 0; - node_to_local_proc.resize(topology->num_nodes); - for (std::vector::iterator it = recv_nodes.begin(); - it != recv_nodes.end(); ++it) - { - node_to_local_proc[*it] = local_proc++ ; - if (local_proc >= topology->PPN) - { - local_proc = 0; - } - } - } - - if (num_recv_nodes) - { - proc_idx.resize(num_recv_nodes, 0); - } - if (off_node_num_cols) - { - off_node_col_to_lcl_proc.resize(off_node_num_cols); - } - - // Find number of recvd indices per local proc - for (int i = 0; i < off_node_num_cols; i++) - { - proc = off_node_col_to_proc[i]; - node = topology->get_node(proc); - local_proc = node_to_local_proc[node]; - local_recv_sizes[local_proc]++; - off_node_col_to_lcl_proc[i] = local_proc; - } - - // Create displs based on local_recv_sizes - recv_s = 0; - std::vector proc_to_idx(topology->PPN); - for (int i = 0; i < topology->PPN; i++) - { - if (local_recv_sizes[i]) - { - recv_s += local_recv_sizes[i]; - proc_to_idx[i] = local_R_recv->procs.size(); - local_R_recv->procs.emplace_back(i); - local_R_recv->indptr.emplace_back(recv_s); - local_recv_sizes[i] = 0; - local_recv_procs[i] = 1; - } - } - // Add columns to local_recv_indices in location according to - local_R_recv->indices.resize(off_node_num_cols); - for (int i = 0; i < off_node_num_cols; i++) - { - local_proc = off_node_col_to_lcl_proc[i]; - idx_p = proc_to_idx[local_proc]; - idx = local_R_recv->indptr[idx_p] + local_recv_sizes[local_proc]++; - local_R_recv->indices[idx] = i; - } - local_R_recv->num_msgs = local_R_recv->procs.size(); - local_R_recv->size_msgs = local_R_recv->indices.size(); - local_R_recv->finalize(); - - // On node communication-- scalable to do all reduce to find number of - // local processes to send to :) - RAPtor_MPI_Allreduce(local_recv_procs.data(), local_send_procs.data(), topology->PPN, RAPtor_MPI_INT, - RAPtor_MPI_SUM, topology->local_comm); - local_num_sends = local_send_procs[local_rank]; - - // Send recv_indices to each recv_proc along with their origin - // node - if (local_R_recv->size_msgs) - { - send_buffer.resize(2*local_R_recv->size_msgs); - } - - - ctr = 0; - start_ctr = 0; - for (int i = 0; i < local_R_recv->num_msgs; i++) - { - recv_proc = local_R_recv->procs[i]; - recv_start = local_R_recv->indptr[i]; - recv_end = local_R_recv->indptr[i+1]; - for (int j = recv_start; j < recv_end; j++) - { - idx = local_R_recv->indices[j]; - send_buffer[ctr++] = off_node_column_map[idx]; - } - for (int j = recv_start; j < recv_end; j++) - { - idx = local_R_recv->indices[j]; - send_buffer[ctr++] = off_node_col_to_proc[idx]; - } - RAPtor_MPI_Isend(&(send_buffer[start_ctr]), 2*(recv_end - recv_start), - RAPtor_MPI_INT, recv_proc, 6543, topology->local_comm, - &(local_R_recv->requests[i])); - start_ctr = ctr; - } - - // Recv messages from local processes and add to send_data - ctr = 0; - for (int i = 0; i < local_num_sends; i++) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, 6543, topology->local_comm, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_INT, &count); - proc = recv_status.RAPtor_MPI_SOURCE; - int recvbuf[count]; - RAPtor_MPI_Recv(recvbuf, count, RAPtor_MPI_INT, proc, 6543, topology->local_comm, - &recv_status); - local_R_par_comm->send_data->add_msg(proc, count / 2, recvbuf); - start_ctr = count / 2; - // Add orig nodes for each recvd col (need to know this for - // global communication setup) - for (int j = start_ctr; j < count; j++) - { - orig_procs.emplace_back(recvbuf[j]); - } - } - local_R_par_comm->send_data->finalize(); - - // Wait for all sends to complete - if (local_R_recv->num_msgs) - { - RAPtor_MPI_Waitall(local_R_recv->num_msgs, - local_R_recv->requests.data(), - RAPtor_MPI_STATUS_IGNORE); - } - -} - -/************************************************************** -***** Find global comm procs -************************************************************** -***** Determine which processes with which rank will communicate -***** during inter-node communication -***** -***** Parameters -***** ------------- -***** recv_nodes : std::vector& -***** All nodes with which any local process communicates -***** send_procs : std::vector& -***** Returns with all off_node processes to which rank sends -***** recv_procs : std::vector& -***** Returns with all off_node process from which rank recvs -**************************************************************/ -void TAPComm::form_global_par_comm(std::vector& orig_procs) -{ - int rank, num_procs; - int local_rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_rank(topology->local_comm, &local_rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - int n_sends; - int proc, node; - int recvbuf; - int n_send_procs; - int recv_s; - int idx, node_idx; - int ctr; - int start, end, size; - int count; - RAPtor_MPI_Status recv_status; - - std::vector node_list(topology->num_nodes, 0); - std::vector sendbuf; - std::vector sendbuf_sizes; - std::vector send_procs; - std::vector send_sizes(topology->PPN); - std::vector send_displs(topology->PPN+1); - std::vector node_sizes(topology->num_nodes, 0); - std::vector send_proc_sizes; - std::vector node_to_idx(topology->num_nodes, 0); - std::vector node_recv_idx_orig_procs; - std::vector send_buffer; - - NonContigData* global_recv = (NonContigData*) global_par_comm->recv_data; - - if (local_R_par_comm->send_data->size_msgs) - { - node_recv_idx_orig_procs.resize(local_R_par_comm->send_data->size_msgs); - } - - // Find how many msgs must recv from each node - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - proc = orig_procs[i]; - node = topology->get_node(proc); - node_sizes[node]++; - } - - // Form recv procs and indptr, based on node_sizes - recv_s = 0; - for (int i = 0; i < topology->num_nodes; i++) - { - if (node_sizes[i]) - { - recv_s += node_sizes[i]; - node_to_idx[i] = global_recv->procs.size(); - global_recv->indptr.emplace_back(recv_s); - global_recv->procs.emplace_back(i); // currently have node - node_sizes[i] = 0; - } - } - global_recv->num_msgs = global_recv->procs.size(); - global_recv->size_msgs = recv_s; - - // Form recv indices, placing global column in correct position - global_recv->indices.resize(recv_s); - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - proc = orig_procs[i]; - node = topology->get_node(proc); - node_idx = node_to_idx[node]; - idx = global_recv->indptr[node_idx] + node_sizes[node]++; - global_recv->indices[idx] = local_R_par_comm->send_data->indices[i]; - node_recv_idx_orig_procs[idx] = proc; - } - - // Remove duplicates... Likely send same data to multiple local procs, but - // only want to recv this data from a distant node once - ctr = 0; - start = global_recv->indptr[0]; - for (int i = 0; i < global_recv->num_msgs; i++) - { - proc = global_recv->procs[i]; - end = global_recv->indptr[i+1]; - size = end - start; - if (size) - { - // Find permutation of node_recv_indices (between start and end) - // in ascending order - std::vector p(size); - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), - [&] (int j, int k) - { - return global_recv->indices[j+start] - < global_recv->indices[k+start]; - }); - - // Sort node_recv_indices and node_recv_idx_orig_procs together - std::vector done(size); - for (int j = 0; j < size; j++) - { - if (done[j]) continue; - - done[j] = true; - int prev_k = j; - int k = p[j]; - while (j != k) - { - std::swap(global_recv->indices[prev_k+start], - global_recv->indices[k+start]); - std::swap(node_recv_idx_orig_procs[prev_k+start], - node_recv_idx_orig_procs[k+start]); - done[k] = true; - prev_k = k; - k = p[k]; - } - } - } - - // Add msg to global_par_comm->recv_data - node_recv_idx_orig_procs[ctr] = node_recv_idx_orig_procs[start]; - global_recv->indices[ctr++] - = global_recv->indices[start]; - for (int j = start+1; j < end; j++) - { - if (global_recv->indices[j] != global_recv->indices[j-1]) - { - node_recv_idx_orig_procs[ctr] = node_recv_idx_orig_procs[j]; - global_recv->indices[ctr++] = global_recv->indices[j]; - } - } - global_recv->indptr[i + 1] = ctr; - start = end; - } - global_recv->indices.resize(ctr); - global_recv->size_msgs = ctr; - global_recv->finalize(); - - std::vector send_p(num_procs, 0); - for (int i = 0; i < global_recv->num_msgs; i++) - { - node = global_recv->procs[i]; - proc = topology->get_global_proc(node, local_rank); - send_p[proc] = 1; - } - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, send_p.data(), num_procs, RAPtor_MPI_INT, - RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); - int recv_n = send_p[rank]; - sendbuf.resize(recv_n); - sendbuf_sizes.resize(recv_n); - // Send recv sizes to corresponding local procs on appropriate nodes - ctr = 0; - for (int i = 0; i < global_recv->num_msgs; i++) - { - node = global_recv->procs[i]; - proc = topology->get_global_proc(node, local_rank); - RAPtor_MPI_Isend(&(node_sizes[node]), 1, RAPtor_MPI_INT, proc, 9876, RAPtor_MPI_COMM_WORLD, - &(global_recv->requests[i])); - } - for (int i = 0; i < recv_n; i++) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, 9876, RAPtor_MPI_COMM_WORLD, &recv_status); - proc = recv_status.RAPtor_MPI_SOURCE; - RAPtor_MPI_Recv(&recvbuf, 1, RAPtor_MPI_INT, proc, 9876, RAPtor_MPI_COMM_WORLD, - &recv_status); - sendbuf[i] = proc; - sendbuf_sizes[i] = recvbuf; - } - RAPtor_MPI_Waitall(global_recv->num_msgs, global_recv->requests.data(), - RAPtor_MPI_STATUSES_IGNORE); - - // Gather all procs to which node must send - n_sends = sendbuf.size(); - RAPtor_MPI_Allgather(&n_sends, 1, RAPtor_MPI_INT, send_sizes.data(), 1, RAPtor_MPI_INT, topology->local_comm); - send_displs[0] = 0; - for (int i = 0; i < topology->PPN; i++) - { - send_displs[i+1] = send_displs[i] + send_sizes[i]; - } - n_send_procs = send_displs[topology->PPN]; - send_procs.resize(n_send_procs); - send_proc_sizes.resize(n_send_procs); - RAPtor_MPI_Allgatherv(sendbuf.data(), n_sends, RAPtor_MPI_INT, send_procs.data(), - send_sizes.data(), send_displs.data(), RAPtor_MPI_INT, topology->local_comm); - RAPtor_MPI_Allgatherv(sendbuf_sizes.data(), n_sends, RAPtor_MPI_INT, send_proc_sizes.data(), - send_sizes.data(), send_displs.data(), RAPtor_MPI_INT, topology->local_comm); - - // Permute send_procs based on send_proc_sizes - std::vector p(n_send_procs); - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), - [&](const int lhs, const int rhs) - { - return send_proc_sizes[lhs] > send_proc_sizes[rhs]; - }); - std::vector done(n_send_procs); - for (int i = 0; i < n_send_procs; i++) - { - if (done[i]) continue; - - done[i] = true; - int prev_j = i; - int j = p[i]; - while (i != j) - { - std::swap(send_procs[prev_j], send_procs[j]); - std::swap(send_proc_sizes[prev_j], send_proc_sizes[j]); - done[j] = true; - prev_j = j; - j = p[j]; - } - } - - // Distribute send_procs across local procs - n_sends = 0; - for (size_t i = topology->PPN - local_rank - 1; i < send_procs.size(); i += topology->PPN) - { - global_par_comm->send_data->procs.emplace_back(send_procs[i]); - } - global_par_comm->send_data->num_msgs = global_par_comm->send_data->procs.size(); - global_par_comm->send_data->requests.resize(global_par_comm->send_data->num_msgs); - - - for (int i = 0; i < global_par_comm->send_data->num_msgs; i++) - { - proc = global_par_comm->send_data->procs[i]; - RAPtor_MPI_Isend(&(global_par_comm->send_data->procs[i]), 1, RAPtor_MPI_INT, proc, 6789, - RAPtor_MPI_COMM_WORLD, &(global_par_comm->send_data->requests[i])); - } - // Recv processes from which rank must recv - for (int i = 0; i < global_recv->num_msgs; i++) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, 6789, RAPtor_MPI_COMM_WORLD, &recv_status); - proc = recv_status.RAPtor_MPI_SOURCE; - node = topology->get_node(proc); - RAPtor_MPI_Recv(&recvbuf, 1, RAPtor_MPI_INT, proc, 6789, RAPtor_MPI_COMM_WORLD, &recv_status); - idx = node_to_idx[node]; - global_recv->procs[idx] = proc; - } - // Wait for sends to complete - if (global_par_comm->send_data->num_msgs) - { - RAPtor_MPI_Waitall(global_par_comm->send_data->num_msgs, - global_par_comm->send_data->requests.data(), RAPtor_MPI_STATUSES_IGNORE); - - } - - - for (int i = 0; i < global_recv->size_msgs; i++) - { - send_buffer.emplace_back(global_recv->indices[i]); - send_buffer.emplace_back(node_recv_idx_orig_procs[i]); - } - - - // Send recv indices to each recv proc along with the process of - // origin for each recv idx - ctr = 0; - - for (int i = 0; i < global_recv->num_msgs; i++) - { - proc = global_recv->procs[i]; - start = global_recv->indptr[i]; - end = global_recv->indptr[i+1]; - RAPtor_MPI_Isend(&(send_buffer[2*start]), 2*(end - start), - RAPtor_MPI_INT, proc, 5432, RAPtor_MPI_COMM_WORLD, - &(global_recv->requests[i])); - - } - - // Recv send data (which indices to send) to global processes - orig_procs.clear(); - for (int i = 0; i < global_par_comm->send_data->num_msgs; i++) - { - proc = global_par_comm->send_data->procs[i]; - RAPtor_MPI_Probe(proc, 5432, RAPtor_MPI_COMM_WORLD, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_INT, &count); - int commbuf[count]; - RAPtor_MPI_Recv(commbuf, count, RAPtor_MPI_INT, proc, 5432, RAPtor_MPI_COMM_WORLD, &recv_status); - for (int j = 0; j < count; j += 2) - { - global_par_comm->send_data->indices.emplace_back(commbuf[j]); - orig_procs.emplace_back(topology->get_local_proc(commbuf[j+1])); - } - global_par_comm->send_data->indptr.emplace_back( - global_par_comm->send_data->indices.size()); - } - global_par_comm->send_data->num_msgs = global_par_comm->send_data->procs.size(); - global_par_comm->send_data->size_msgs = global_par_comm->send_data->indices.size(); - global_par_comm->send_data->finalize(); - - if (global_recv->num_msgs) - { - RAPtor_MPI_Waitall(global_recv->num_msgs, - global_recv->requests.data(), - RAPtor_MPI_STATUS_IGNORE); - } -} - - -/************************************************************** -***** Form local_S_par_comm -************************************************************** -***** Find which local processes the values originating on rank -***** must be sent to, and which processes store values rank must -***** send as inter-node communication. -***** -***** Parameters -***** ------------- -**************************************************************/ -void TAPComm::form_local_S_par_comm(std::vector& orig_procs) -{ - int rank; - int local_rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_rank(topology->local_comm, &local_rank); - - // Find local_col_starts for all procs local to node, and sort - int start, end; - int proc, proc_idx; - - int ctr, idx; - int size; - - std::vector local_procs(topology->PPN); - std::vector proc_sizes(topology->PPN, 0); - std::vector recv_procs(topology->PPN, 0); - std::vector proc_to_idx(topology->PPN); - - NonContigData* local_S_recv = (NonContigData*) local_S_par_comm->recv_data; - - if (global_par_comm->send_data->num_msgs) - { - local_S_recv->indices.resize(global_par_comm->send_data->size_msgs); - } - - // Find all column indices originating on local procs - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - proc = orig_procs[i]; - proc_sizes[proc]++; - recv_procs[proc] = 1; - } - - // Reduce recv_procs to how many msgs rank will recv - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, recv_procs.data(), topology->PPN, RAPtor_MPI_INT, RAPtor_MPI_SUM, topology->local_comm); - int n_recvs = recv_procs[local_rank]; - - // Form local_S_par_comm recv_data - int recv_s = 0; - for (int i = 0; i < topology->PPN; i++) - { - if (proc_sizes[i]) - { - recv_s += proc_sizes[i]; - proc_to_idx[i] = local_S_recv->procs.size(); - local_S_recv->procs.emplace_back(i); - local_S_recv->indptr.emplace_back(recv_s); - } - proc_sizes[i] = 0; - } - local_S_recv->num_msgs = local_S_recv->procs.size(); - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - proc = orig_procs[i]; - proc_idx = proc_to_idx[proc]; - idx = local_S_recv->indptr[proc_idx] + proc_sizes[proc]++; - local_S_recv->indices[idx] = global_par_comm->send_data->indices[i]; - } - - // Remove duplicate entries from local_S_par_comm recv_data (proc may have - // to send the same data to multiple nodes, but should only recv values a - // single time from each local proc) - ctr = 0; - start = local_S_recv->indptr[0]; - for (int i = 0; i < local_S_recv->num_msgs; i++) - { - end = local_S_recv->indptr[i+1]; - size = end - start; - if (size) - { - std::sort(local_S_recv->indices.begin() + start, - local_S_recv->indices.begin() + end); - local_S_recv->indices[ctr++] = - local_S_recv->indices[start]; - for (int j = start+1; j < end; j++) - { - if (local_S_recv->indices[j] - != local_S_recv->indices[j-1]) - { - local_S_recv->indices[ctr++] - = local_S_recv->indices[j]; - } - } - } - local_S_recv->indptr[i+1] = ctr; - start = end; - } - local_S_recv->indices.resize(ctr); - local_S_recv->size_msgs = ctr; - local_S_recv->finalize(); - - // Send messages to local procs, informing of what data to send - for (int i = 0; i < local_S_recv->num_msgs; i++) - { - proc = local_S_recv->procs[i]; - start = local_S_recv->indptr[i]; - end = local_S_recv->indptr[i+1]; - RAPtor_MPI_Isend(&(local_S_recv->indices[start]), - end - start, RAPtor_MPI_INT, proc, 4321, topology->local_comm, - &(local_S_recv->requests[i])); - } - // Recv messages and form local_S_par_comm send_data - int count; - RAPtor_MPI_Status recv_status; - for (int i = 0; i < n_recvs; i++) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, 4321, topology->local_comm, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_INT, &count); - proc = recv_status.RAPtor_MPI_SOURCE; - int recvbuf[count]; - RAPtor_MPI_Recv(recvbuf, count, RAPtor_MPI_INT, proc, 4321, topology->local_comm, &recv_status); - for (int j = 0; j < count; j++) - { - local_S_par_comm->send_data->indices.emplace_back(recvbuf[j]); - } - local_S_par_comm->send_data->indptr.emplace_back( - local_S_par_comm->send_data->indices.size()); - local_S_par_comm->send_data->procs.emplace_back(proc); - } - local_S_par_comm->send_data->num_msgs = local_S_par_comm->send_data->procs.size(); - local_S_par_comm->send_data->size_msgs = local_S_par_comm->send_data->indices.size(); - local_S_par_comm->send_data->finalize(); - if (local_S_recv->num_msgs) - { - RAPtor_MPI_Waitall(local_S_recv->num_msgs, - local_S_recv->requests.data(), - RAPtor_MPI_STATUS_IGNORE); - } -} - - -void TAPComm::adjust_send_indices(const int first_local_col) -{ - int idx, idx_pos, size; - int local_S_idx, global_comm_idx; - - if (local_S_par_comm) - { - DuplicateData* local_S_recv = (DuplicateData*) local_S_par_comm->recv_data; - // Update global row index with local row to send - for (int i = 0; i < local_S_par_comm->send_data->size_msgs; i++) - { - local_S_par_comm->send_data->indices[i] -= first_local_col; - } - - // Update global_par_comm->send_data->indices (global rows) to - std::map S_global_to_local; - for (int i = 0; i < local_S_recv->size_msgs; i++) - { - S_global_to_local[local_S_recv->indices[i]] = i; - } - std::vector local_S_num_pos; - if (local_S_recv->size_msgs) - local_S_num_pos.resize(local_S_recv->size_msgs, 0); - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - idx = global_par_comm->send_data->indices[i]; - local_S_idx = S_global_to_local[idx]; - global_par_comm->send_data->indices[i] = local_S_idx; - local_S_num_pos[local_S_idx]++; - } - local_S_recv->indptr_T.resize(local_S_recv->size_msgs + 1); - local_S_recv->indptr_T[0] = 0; - size = 0; - for (int i = 0; i < local_S_recv->size_msgs; i++) - { - size += local_S_num_pos[i]; - local_S_recv->indptr_T[i+1] = size; - local_S_num_pos[i] = 0; - } - local_S_recv->indices.resize(size); - for(int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - idx = global_par_comm->send_data->indices[i]; - idx_pos = local_S_recv->indptr_T[idx] + local_S_num_pos[idx]++; - local_S_recv->indices[idx_pos] = i; - } - } - else - { - for (int i = 0; i < global_par_comm->send_data->size_msgs; i++) - { - global_par_comm->send_data->indices[i] -= first_local_col; - } - } - - // Update local_R_par_comm->send_data->indices (global_rows) - DuplicateData* global_recv = (DuplicateData*) global_par_comm->recv_data; - std::map global_to_local; - for (int i = 0; i < global_recv->size_msgs; i++) - { - global_to_local[global_recv->indices[i]] = i; - } - std::vector global_num_pos; - if (global_recv->size_msgs) - global_num_pos.resize(global_recv->size_msgs, 0); - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - idx = local_R_par_comm->send_data->indices[i]; - global_comm_idx = global_to_local[idx]; - local_R_par_comm->send_data->indices[i] = global_comm_idx; - global_num_pos[global_comm_idx]++; - } - global_recv->indptr_T.resize(global_recv->size_msgs + 1); - global_recv->indptr_T[0] = 0; - size = 0; - for (int i = 0; i < global_recv->size_msgs; i++) - { - size += global_num_pos[i]; - global_recv->indptr_T[i+1] = size; - global_num_pos[i] = 0; - } - global_recv->indices.resize(size); - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - idx = local_R_par_comm->send_data->indices[i]; - idx_pos = global_recv->indptr_T[idx] + global_num_pos[idx]++; - global_recv->indices[idx_pos] = i; - } - -} - -/************************************************************** -***** Form local_L_par_comm -************************************************************** -***** Adjust send indices from global row index to index of -***** global column in previous recv buffer. -***** -***** Parameters -***** ------------- -***** on_node_column_map : std::vector& -***** Columns corresponding to on_node processes -***** on_node_col_to_proc : std::vector& -***** On node process corresponding to each column -***** in on_node_column_map -***** first_local_row : int -***** First row local to rank -**************************************************************/ -void TAPComm::form_local_L_par_comm(const std::vector& on_node_column_map, - const std::vector& on_node_col_to_proc, const int first_local_col) -{ - int local_rank; - RAPtor_MPI_Comm_rank(topology->local_comm, &local_rank); - - int on_node_num_cols = on_node_column_map.size(); - int prev_proc, prev_idx; - int num_sends; - int proc, start, end; - int count; - RAPtor_MPI_Status recv_status; - std::vector recv_procs(topology->PPN, 0); - - NonContigData* local_L_recv = (NonContigData*) local_L_par_comm->recv_data; - - if (on_node_num_cols) - { - prev_proc = on_node_col_to_proc[0]; - recv_procs[prev_proc] = 1; - prev_idx = 0; - for (int i = 1; i < on_node_num_cols; i++) - { - proc = on_node_col_to_proc[i]; - if (proc != prev_proc) - { - local_L_recv->add_msg(prev_proc, i - prev_idx); - prev_proc = proc; - prev_idx = i; - recv_procs[proc] = 1; - } - } - local_L_recv->add_msg(prev_proc, on_node_num_cols - prev_idx); - local_L_recv->finalize(); - - for (int i = 0; i < on_node_num_cols; i++) - { - local_L_recv->indices.emplace_back(i); - } - } - - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, recv_procs.data(), topology->PPN, RAPtor_MPI_INT, RAPtor_MPI_SUM, - topology->local_comm); - num_sends = recv_procs[local_rank]; - - for (int i = 0; i < local_L_recv->num_msgs; i++) - { - proc = local_L_recv->procs[i]; - start = local_L_recv->indptr[i]; - end = local_L_recv->indptr[i+1]; - RAPtor_MPI_Isend(&(on_node_column_map[start]), end - start, RAPtor_MPI_INT, proc, - 7890, topology->local_comm, &(local_L_recv->requests[i])); - } - for (int i = 0; i < num_sends; i++) - { - RAPtor_MPI_Probe(RAPtor_MPI_ANY_SOURCE, 7890, topology->local_comm, &recv_status); - RAPtor_MPI_Get_count(&recv_status, RAPtor_MPI_INT, &count); - proc = recv_status.RAPtor_MPI_SOURCE; - int recvbuf[count]; - RAPtor_MPI_Recv(recvbuf, count, RAPtor_MPI_INT, proc, 7890, topology->local_comm, &recv_status); - for (int j = 0; j < count; j++) - { - recvbuf[j] -= first_local_col; - } - local_L_par_comm->send_data->add_msg(proc, count, recvbuf); - } - local_L_par_comm->send_data->finalize(); - - if (local_L_recv->num_msgs) - { - RAPtor_MPI_Waitall(local_L_recv->num_msgs, - local_L_recv->requests.data(), - RAPtor_MPI_STATUSES_IGNORE); - } -} - -void TAPComm::form_simple_R_par_comm(std::vector& off_node_column_map, - std::vector& off_node_col_to_proc) -{ - int rank, local_rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_rank(topology->local_comm, &local_rank); - - int proc, local_proc; - int proc_idx, idx; - int off_node_num_cols = off_node_column_map.size(); - std::vector local_proc_sizes(topology->PPN, 0); - std::vector proc_size_idx(topology->PPN); - - NonContigData* local_R_recv = (NonContigData*) local_R_par_comm->recv_data; - - // Form local_R_par_comm recv_data (currently with global recv indices) - for (std::vector::iterator it = off_node_col_to_proc.begin(); - it != off_node_col_to_proc.end(); ++it) - { - local_proc = topology->get_local_proc(*it); - local_proc_sizes[local_proc]++; - } - - local_R_recv->size_msgs = 0; - local_R_recv->indptr[0] = local_R_recv->size_msgs; - for (int i = 0; i < topology->PPN; i++) - { - if (local_proc_sizes[i]) - { - local_R_recv->num_msgs++; - local_R_recv->size_msgs += local_proc_sizes[i]; - local_proc_sizes[i] = 0; - - proc_size_idx[i] = local_R_recv->procs.size(); - local_R_recv->procs.emplace_back(i); - local_R_recv->indptr.emplace_back( - local_R_recv->size_msgs); - } - } - if (local_R_recv->size_msgs) - { - local_R_recv->indices.resize(local_R_recv->size_msgs); - } - - for (int i = 0; i < off_node_num_cols; i++) - { - proc = off_node_col_to_proc[i]; - local_proc = topology->get_local_proc(proc); - proc_idx = proc_size_idx[local_proc]; - idx = local_R_recv->indptr[proc_idx] + local_proc_sizes[local_proc]++; - local_R_recv->indices[idx] = i; - } - local_R_recv->finalize(); - - // Communicate local_R recv_data so send_data can be formed - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, local_proc_sizes.data(), topology->PPN, RAPtor_MPI_INT, - RAPtor_MPI_SUM, topology->local_comm); - - local_R_par_comm->recv_data->send(off_node_column_map.data(), 6543, topology->local_comm); - local_R_par_comm->send_data->probe(local_proc_sizes[local_rank], 6543, topology->local_comm); - local_R_par_comm->recv_data->waitall(); -} - -void TAPComm::form_simple_global_comm(std::vector& off_proc_col_to_proc) -{ - int rank; - int num_procs; - int proc, start, end; - int idx, proc_idx; - int global_idx; - - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - std::vector proc_sizes(num_procs, 0); - std::vector proc_ctr; - - NonContigData* global_recv = (NonContigData*) global_par_comm->recv_data; - - // Communicate processes on which each index originates - local_R_par_comm->communicate_T(off_proc_col_to_proc.data()); - std::vector& int_send_buffer = local_R_par_comm->send_data->get_buffer(); - - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - proc = int_send_buffer[i]; - if (proc_sizes[proc] == 0) - { - global_recv->procs.emplace_back(proc); - } - proc_sizes[proc]++; - } - - global_recv->num_msgs = global_recv->procs.size(); - global_recv->indptr[0] = 0; - global_recv->size_msgs = 0; - for (int i = 0; i < global_recv->num_msgs; i++) - { - proc = global_recv->procs[i]; - global_recv->size_msgs += proc_sizes[proc]; - proc_sizes[proc] = i; // Will now use this for proc_idx - global_recv->indptr.emplace_back(global_recv->size_msgs); - } - if (global_recv->size_msgs) - { - global_recv->indices.resize(global_recv->size_msgs); - proc_ctr.resize(global_recv->num_msgs, 0); - } - - for (int i = 0; i < local_R_par_comm->send_data->size_msgs; i++) - { - global_idx = local_R_par_comm->send_data->indices[i]; - proc = int_send_buffer[i]; - proc_idx = proc_sizes[proc]; - idx = global_recv->indptr[proc_idx] + proc_ctr[proc_idx]++; - global_recv->indices[idx] = global_idx; - } - global_recv->finalize(); - - // Communicate global recv_data so send_data can be formed (dynamic comm) - std::vector recv_sizes(num_procs, 0); - for (int i = 0; i < global_recv->num_msgs; i++) - recv_sizes[global_recv->procs[i]] = global_recv->indptr[i+1] - global_recv->indptr[i]; - RAPtor_MPI_Allreduce(RAPtor_MPI_IN_PLACE, recv_sizes.data(), num_procs, RAPtor_MPI_INT, RAPtor_MPI_SUM, RAPtor_MPI_COMM_WORLD); - - for (int i = 0; i < global_recv->num_msgs; i++) - { - proc = global_recv->procs[i]; - start = global_recv->indptr[i]; - end = global_recv->indptr[i+1]; - RAPtor_MPI_Isend(&(global_recv->indices[start]), end - start, RAPtor_MPI_INT, - proc, 6789, RAPtor_MPI_COMM_WORLD, &(global_recv->requests[i])); - } - global_par_comm->send_data->probe(recv_sizes[rank], 6789, RAPtor_MPI_COMM_WORLD); - global_par_comm->recv_data->waitall(); -} - -void TAPComm::update_recv(const std::vector& on_node_to_off_proc, - const std::vector& off_node_to_off_proc, bool update_L) -{ - int idx; - - // Determine size of final recvs (should be equal to - // number of off_proc cols) - recv_size = local_R_par_comm->recv_data->size_msgs + - local_L_par_comm->recv_data->size_msgs; - NonContigData* local_R_recv = (NonContigData*) local_R_par_comm->recv_data; - NonContigData* local_L_recv = (NonContigData*) local_L_par_comm->recv_data; - if (recv_size) - { - // Want a single recv buffer local_R and local_L par_comms - buffer.resize(recv_size); - int_buffer.resize(recv_size); - - // Map local_R recvs to original off_proc_column_map - if (local_R_recv->size_msgs) - { - for (int i = 0; i < local_R_recv->size_msgs; i++) - { - idx = local_R_recv->indices[i]; - local_R_recv->indices[i] = off_node_to_off_proc[idx]; - } - } - - - // Map local_L recvs to original off_proc_column_map - if (update_L && local_L_recv->size_msgs) - { - for (int i = 0; i < local_L_recv->size_msgs; i++) - { - idx = local_L_recv->indices[i]; - local_L_recv->indices[i] = on_node_to_off_proc[idx]; - } - } - } -} - - - - diff --git a/raptor/core/tests/CMakeLists.txt b/raptor/core/tests/CMakeLists.txt deleted file mode 100644 index 0d879f81..00000000 --- a/raptor/core/tests/CMakeLists.txt +++ /dev/null @@ -1,56 +0,0 @@ -if (WITH_MPI) - add_executable(test_par_comm test_par_comm.cpp) - target_link_libraries(test_par_comm raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParCommTest ${MPIRUN} -n 1 ${HOST} ./test_par_comm) - add_test(ParCommTest ${MPIRUN} -n 4 ${HOST} ./test_par_comm) - add_test(ParCommTest ${MPIRUN} -n 16 ${HOST} ./test_par_comm) - - add_executable(test_tap_comm test_tap_comm.cpp) - target_link_libraries(test_tap_comm raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(TAPCommTest ${MPIRUN} -n 1 ${HOST} ./test_tap_comm) - add_test(TAPCommTest ${MPIRUN} -n 4 ${HOST} ./test_tap_comm) - add_test(TAPCommTest ${MPIRUN} -n 16 ${HOST} ./test_tap_comm) - - add_executable(test_par_matrix test_par_matrix.cpp) - target_link_libraries(test_par_matrix raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParMatrixTest ${MPIRUN} -n 1 ${HOST} ./test_par_matrix) - add_test(ParMatrixTest ${MPIRUN} -n 4 ${HOST} ./test_par_matrix) - add_test(ParMatrixTest ${MPIRUN} -n 16 ${HOST} ./test_par_matrix) - - add_executable(test_par_vector test_par_vector.cpp) - target_link_libraries(test_par_vector raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParVectorTest ${MPIRUN} -n 1 ${HOST} ./test_par_vector) - add_test(ParVectorTest ${MPIRUN} -n 4 ${HOST} ./test_par_vector) - add_test(ParVectorTest ${MPIRUN} -n 16 ${HOST} ./test_par_vector) - - add_executable(test_par_transpose test_par_transpose.cpp) - target_link_libraries(test_par_transpose raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParTransposeTest ${MPIRUN} -n 1 ${HOST} ./test_par_transpose) - add_test(ParTransposeTest ${MPIRUN} -n 4 ${HOST} ./test_par_transpose) - add_test(ParTransposeTest ${MPIRUN} -n 16 ${HOST} ./test_par_transpose) - - add_executable(test_par_block_matrix test_par_block_matrix.cpp) - target_link_libraries(test_par_block_matrix raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParBlockMatrixTest ${MPIRUN} -n 1 ${HOST} ./test_par_block_matrix) - add_test(ParBlockMatrixTest ${MPIRUN} -n 4 ${HOST} ./test_par_block_matrix) - add_test(ParBlockMatrixTest ${MPIRUN} -n 16 ${HOST} ./test_par_block_matrix) - - add_executable(test_par_block_conversion test_par_block_conversion.cpp) - target_link_libraries(test_par_block_conversion raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParBlockConversionTest ${MPIRUN} -n 1 ${HOST} ./test_par_block_conversion) - add_test(ParBlockConversionTest ${MPIRUN} -n 4 ${HOST} ./test_par_block_conversion) - add_test(ParBlockConversionTest ${MPIRUN} -n 16 ${HOST} ./test_par_block_conversion) - -endif () - -add_executable(test_matrix test_matrix.cpp) -target_link_libraries(test_matrix raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(MatrixTest ./test_matrix) - -add_executable(test_transpose test_transpose.cpp) -target_link_libraries(test_transpose raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(TransposeTest ./test_transpose) - -add_executable(test_bsr_matrix test_bsr_matrix.cpp) -target_link_libraries(test_bsr_matrix raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRMatrixTest ./test_bsr_matrix) diff --git a/raptor/core/tests/test_block_matrix.cpp b/raptor/core/tests/test_block_matrix.cpp deleted file mode 100644 index c37408b0..00000000 --- a/raptor/core/tests/test_block_matrix.cpp +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor.hpp" -using namespace raptor; - -void compare_vals(CSRMatrix* A, BSRMatrix* B) -{ - A->sort(); - B->sort(); - int ctr = 0; - for (int i = 0; i < B->n_rows; i++) - { - for (int k = 0; k < B->b_rows; k++) - { - for (int j = B->idx1[i]; j < B->idx1[i+1]; j++) - { - double* val = B->block_vals[j]; - for (int l = 0; l < B->b_cols; l++) - { - if (fabs(val[k*B->b_cols + l]) > zero_tol) - { - ASSERT_NEAR(val[k*B->b_cols + l], A->vals[ctr++], 1e-10); - } - - } - } - } - } - -} - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - -} // end of main() // - -TEST(BlockMatrixTest, TestsInCore) -{ - int block_row_size = 2; - int block_col_size = 2; - int block_size = 4; - int block_nnz = 5; - int block_num_rows = 3; - int block_num_cols = 3; - int num_rows = block_num_rows * block_row_size; - int num_cols = block_num_cols * block_col_size; - int nnz = block_nnz * block_size; - - std::vector rows = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5}; - std::vector cols = {0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 4, 5, 4, 5}; - std::vector vals = {1.0, 0.0, 2.0, 1.0, 6.0, 7.0, 8.0, 2.0, 1.0, 4.0, 5.0, 1.0, - 4.0, 3.0, 0.0, 0.0, 7.0, 2.0, 0.0, 0.0}; - - std::vector block_row_ptr = {0,2,3,5}; - std::vector block_rows = {0, 0, 1, 2, 2}; - std::vector block_cols = {0, 1, 1, 1, 2}; - std::vector block_vals; - for (int i = 0; i < block_nnz; i++) - { - double* block = new double[block_size]; - for (int j = 0; j < block_size; j++) - { - block[j] = vals[i*block_size+j]; - } - block_vals.push_back(block); - } - - Matrix* A_bcoo = new BCOOMatrix(block_num_rows, block_num_cols, - block_row_size, block_col_size); - for (int i = 0; i < block_nnz; i++) - A_bcoo->add_value(block_rows[i], block_cols[i], block_vals[i]); - - Matrix* A_coo = new COOMatrix(num_rows, num_cols); - for (int i = 0; i < nnz; i++) - A_coo->add_value(rows[i], cols[i], vals[i]); - - Matrix* A_bsr = A_bcoo->to_CSR(); - Matrix* A_csr = A_coo->to_CSR(); - Matrix* A_bsc = A_bsr->to_CSC(); - Matrix* A_csc = A_csr->to_CSC(); - Matrix* A_csr_from_bsr = A_bsr->to_CSR(); - - Vector x(num_rows); - Vector b(num_cols); - Vector tmp(num_cols); - x.set_const_value(1.0); - - A_bcoo->sort(); - A_bcoo->move_diag(); - A_bcoo->remove_duplicates(); - - A_bsr->sort(); - A_bsr->move_diag(); - A_bsr->remove_duplicates(); - - A_bsc->sort(); - A_bsc->move_diag(); - A_bsc->remove_duplicates(); - - ASSERT_EQ(A_bcoo->n_rows, A_bsr->n_rows); - ASSERT_EQ(A_bsr->n_rows, A_bsc->n_rows); - ASSERT_EQ(A_bcoo->n_cols, A_bsr->n_cols); - ASSERT_EQ(A_bsr->n_cols, A_bsc->n_cols); - ASSERT_EQ(A_bcoo->nnz, A_bsr->nnz); - ASSERT_EQ(A_bsr->nnz, A_bsc->nnz); - ASSERT_EQ(A_csr_from_bsr->nnz, A_csr->nnz); - - double** bcoo_vals = (double**) A_bcoo->get_data(); - double** bsr_vals = (double**) A_bsr->get_data(); - for (int i = 0; i < A_bcoo->nnz; i++) - { - for (int j = 0; j < A_bcoo->b_size; j++) - { - ASSERT_NEAR(bcoo_vals[i][j], bsr_vals[i][j], 1e-10); - } - } - - Matrix* Atmp = A_bsc->to_CSR(); - Atmp->sort(); - Atmp->move_diag(); - double** tmp_vals = (double**) Atmp->get_data(); - for (int i = 0; i < A_bsr->nnz; i++) - { - for (int j = 0; j < A_bsr->b_size; j++) - { - ASSERT_NEAR(bsr_vals[i][j], tmp_vals[i][j], 1e-10); - } - } - - ASSERT_EQ(A_bcoo->format(), BCOO); - ASSERT_EQ(A_coo->format(), COO); - ASSERT_EQ(A_bsr->format(), BSR); - ASSERT_EQ(A_csr->format(), CSR); - ASSERT_EQ(A_bsc->format(), BSC); - ASSERT_EQ(A_csc->format(), CSC); - ASSERT_EQ(A_csr_from_bsr->format(), CSR); - - A_csr->mult(x, b); - A_bsr->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - A_csr_from_bsr->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - A_coo->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - A_bcoo->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - A_csc->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - A_bsc->mult(x, tmp); - for (int i = 0; i < num_cols; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - - CSRMatrix* C_csr = A_csr->mult((CSRMatrix*)A_csr); - CSRMatrix* C_bsr = A_bsr->mult((BSRMatrix*)A_bsr); - ASSERT_EQ(C_csr->n_rows, C_bsr->n_rows * C_bsr->b_rows); - ASSERT_EQ(C_csr->n_cols, C_bsr->n_cols * C_bsr->b_cols); - compare_vals(C_csr, (BSRMatrix*) C_bsr); - - CSRMatrix* C_csr_from_bsr = A_csr_from_bsr->mult((CSRMatrix*)A_csr_from_bsr); - ASSERT_EQ(C_csr_from_bsr->n_rows, C_bsr->n_rows * C_bsr->b_rows); - ASSERT_EQ(C_csr_from_bsr->n_cols, C_bsr->n_cols * C_bsr->b_cols); - compare_vals(C_csr_from_bsr, (BSRMatrix*) C_bsr); - - CSRMatrix* D_csr = A_csr->mult_T((CSCMatrix*)A_csc); - CSRMatrix* D_bsr = A_bsr->mult_T((BSCMatrix*)A_bsc); - ASSERT_EQ(D_csr->n_rows, D_bsr->n_rows * D_bsr->b_rows); - ASSERT_EQ(D_csr->n_cols, D_bsr->n_cols * D_bsr->b_cols); - compare_vals(D_csr, (BSRMatrix*) D_bsr); - - CSRMatrix* D_csr_from_bsr = A_csr_from_bsr->mult_T((CSCMatrix*)A_csc); - ASSERT_EQ(D_csr_from_bsr->n_rows, D_bsr->n_rows * D_bsr->b_rows); - ASSERT_EQ(D_csr_from_bsr->n_cols, D_bsr->n_cols * D_bsr->b_cols); - compare_vals(D_csr_from_bsr, (BSRMatrix*) D_bsr); - - delete A_bsr; - delete A_csr; - delete A_bsc; - delete A_csc; - delete A_bcoo; - delete A_coo; - delete A_csr_from_bsr; - - delete C_csr; - delete C_bsr; - delete C_csr_from_bsr; - - delete D_csr; - delete D_bsr; - delete D_csr_from_bsr; - - for (std::vector::iterator it = block_vals.begin(); - it != block_vals.end(); ++it) - delete[] *it; - -} // end of TEST(MatrixTest, TestsInCore) // - - diff --git a/raptor/core/tests/test_bsr_matrix.cpp b/raptor/core/tests/test_bsr_matrix.cpp deleted file mode 100644 index 850a66da..00000000 --- a/raptor/core/tests/test_bsr_matrix.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -using namespace raptor; - - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - -} // end of main() // - -TEST(BSRMatrixTest, TestsInCore) -{ - // Matrix [0, 1], [1, 0] - // [2, 0], [0, 2] - // [3, 0], [0, 0] - // [0, 4], [0, 0] - int n_csr = 4; - int nnz_csr = 6; - std::vector rowptr_csr = {0, 2, 4, 5, 6}; - std::vector col_idx_csr = {1, 2, 0, 3, 0, 1}; - std::vector data_csr = {1, 1, 2, 2, 3, 4}; - CSRMatrix* A_csr = new CSRMatrix(n_csr, n_csr, rowptr_csr, col_idx_csr, data_csr); - - int n = 2; // 2 blocks by 2 blocks - int br = 2; // blocks are each 2x2 - int bs = 4; - int nnz = 3; // 3 blocks - std::vector rowptr = {0, 2, 3}; - std::vector col_idx = {0, 1, 0}; - std::vector data = {0, 1, 2, 0, 1, 0, 0, 2, 3, 0, 0, 4}; - - // Hardcode one BSR Matrix - BSRMatrix* A = new BSRMatrix(n, n, br, br, nnz); - A->idx1[0] = 0; - for (int i = 0; i < n; i++) - { - A->idx1[i+1] = rowptr[i+1]; - for (int j = A->idx1[i]; j < A->idx1[i+1]; j++) - { - A->idx2.push_back(col_idx[j]); - double* vals = new double[bs]; - for (int k = 0; k < bs; k++) - vals[k] = data[j*bs + k]; - A->block_vals.push_back(vals); - } - } - - // Call method that converts CSR to BSR - BSRMatrix* A_conv = new BSRMatrix(A_csr, br, br); - - // Check that both BSR matrices are equivalent - ASSERT_EQ(A_conv->n_rows, A->n_rows); - ASSERT_EQ(A_conv->n_cols, A->n_cols); - ASSERT_EQ(A_conv->b_rows, A->b_rows); - ASSERT_EQ(A_conv->b_cols, A->b_cols); - ASSERT_EQ(A_conv->b_size, A->b_size); - - for (int i = 0; i < A->n_rows; i++) - { - ASSERT_EQ(A_conv->idx1[i+1], A->idx1[i+1]); - for (int j = A->idx1[i]; j < A->idx1[i+1]; j++) - { - ASSERT_EQ(A_conv->idx2[j], A->idx2[j]); - for (int k = 0; k < A->b_size; k++) - ASSERT_EQ(A_conv->block_vals[j][k], A->block_vals[j][k]); - } - } - - delete A_csr; - delete A; - delete A_conv; - -} // end of TEST(MatrixTest, TestsInCore) // - diff --git a/raptor/core/tests/test_matrix.cpp b/raptor/core/tests/test_matrix.cpp deleted file mode 100644 index bd4c6078..00000000 --- a/raptor/core/tests/test_matrix.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -using namespace raptor; - - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - -} // end of main() // - -TEST(MatrixTest, TestsInCore) -{ - int rows[10] = {22, 17, 12, 0, 5, 7, 1, 0, 0, 12}; - int cols[10] = {5, 18, 21, 0, 7, 7, 0, 1, 0, 21}; - double vals[10] = {2.0, 1.0, 0.5, 1.0, 2.0, 1.0, 1.2, 2.2, 1.5, -1.0}; - - int row_ctr[26] = {0, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 9, - 9, 9, 9, 9, 10, 10, 10}; - - // Create COO Matrix (25x25) - COOMatrix* A_coo = new COOMatrix(25, 25, 1); - for (int i = 0; i < 10; i++) - { - A_coo->add_value(rows[i], cols[i], vals[i]); - } - - // Check dimensions of A_coo - ASSERT_EQ(A_coo->n_rows, 25); - ASSERT_EQ(A_coo->n_cols, 25); - ASSERT_EQ(A_coo->nnz, 10); - - // Check that rows, columns, and values in A_coo are correct - for (int i = 0; i < 10; i++) - { - ASSERT_EQ(A_coo->idx1[i], rows[i]); - ASSERT_EQ(A_coo->idx2[i], cols[i]); - ASSERT_EQ(A_coo->vals[i], vals[i]); - } - - // Create CSR Matrix from COO - CSRMatrix* A_csr = A_coo->to_CSR(); - - // Check dimensions of A_csr - ASSERT_EQ(A_csr->n_rows,25); - ASSERT_EQ(A_csr->n_cols,25); - ASSERT_EQ(A_csr->nnz,10); - - // Check that rows, columns, and values in A_coo are correct - - for (int i = 0; i < 26; i++) - { - ASSERT_EQ(A_csr->idx1[i],row_ctr[i]); - } - - delete A_coo; - delete A_csr; - -} // end of TEST(MatrixTest, TestsInCore) // - diff --git a/raptor/core/tests/test_par_block_conversion.cpp b/raptor/core/tests/test_par_block_conversion.cpp deleted file mode 100644 index 624aec8f..00000000 --- a/raptor/core/tests/test_par_block_conversion.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // - -TEST(ParBlockConversionTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - setenv("PPN", "4", 1); - - // Form standard anisotropic matrix - double eps = 0.001; - double theta = M_PI / 8.0; - int block_n = 2; - std::vector grid(2, num_procs*block_n); - double* stencil = diffusion_stencil_2d(eps, theta); - ParCSRMatrix* A = par_stencil_grid(stencil, grid.data(), 2); - - ParBSRMatrix* A_bsr = A->to_ParBSR(block_n, block_n); - ParCSRMatrix* A_csr_from_bsr = A_bsr->to_ParCSR(); - - ASSERT_EQ(A->local_nnz,A_csr_from_bsr->local_nnz); - - // Test Partition of BSR to CSR - for (int i = 0; i < (int)A_csr_from_bsr->partition->first_cols.size(); i++) - { - ASSERT_EQ(A->partition->first_cols[i], A_csr_from_bsr->partition->first_cols[i]); - } - ASSERT_EQ(A->partition->local_num_rows, A_csr_from_bsr->partition->local_num_rows); - ASSERT_EQ(A->partition->local_num_cols, A_csr_from_bsr->partition->local_num_cols); - ASSERT_EQ(A->partition->first_local_row, A_csr_from_bsr->partition->first_local_row); - ASSERT_EQ(A->partition->first_local_col, A_csr_from_bsr->partition->first_local_col); - ASSERT_EQ(A->partition->last_local_row, A_csr_from_bsr->partition->last_local_row); - ASSERT_EQ(A->partition->last_local_col, A_csr_from_bsr->partition->last_local_col); - - // Test Row and Column Maps of BSR to CSR - for (int i = 0; i < (int)A_csr_from_bsr->off_proc_column_map.size(); i++) - { - ASSERT_EQ(A->off_proc_column_map[i], A_csr_from_bsr->off_proc_column_map[i]); - } - for (int i = 0; i < (int)A_csr_from_bsr->on_proc_column_map.size(); i++) - { - ASSERT_EQ(A->on_proc_column_map[i], A_csr_from_bsr->on_proc_column_map[i]); - } - for (int i = 0; i < (int)A_csr_from_bsr->local_row_map.size(); i++) - { - ASSERT_EQ(A->local_row_map[i], A_csr_from_bsr->local_row_map[i]); - } - - ParVector x(A->global_num_rows, A->local_num_rows); - ParVector b(A->global_num_rows, A->local_num_rows); - ParVector tmp(A->global_num_rows, A->local_num_rows); - x.set_const_value(1.0); - - // Test BSR to CSR SpMV - A_bsr->mult(x, b); - A_csr_from_bsr->mult(x, tmp); - for (int i = 0; i < A_csr_from_bsr->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test BSR to CSR Transpose SpMV - A_bsr->mult_T(x, b); - A_csr_from_bsr->mult_T(x, tmp); - for (int i = 0; i < A_csr_from_bsr->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test BSR to CSR TAPSpMVs - A_bsr->tap_mult(x, b); - A_csr_from_bsr->tap_mult(x, tmp); - for (int i = 0; i < A_csr_from_bsr->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test BSR to CSR Transpose TAPSpMV - A_bsr->tap_mult_T(x, b); - A_csr_from_bsr->tap_mult_T(x, tmp); - for (int i = 0; i < A_csr_from_bsr->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - delete A; - delete A_bsr; - delete A_csr_from_bsr; - - setenv("PPN", "16", 1); - - -} // end of TEST(ParBlockConversionTest, TestsInCore) // - - - diff --git a/raptor/core/tests/test_par_block_matrix.cpp b/raptor/core/tests/test_par_block_matrix.cpp deleted file mode 100644 index 6668ae79..00000000 --- a/raptor/core/tests/test_par_block_matrix.cpp +++ /dev/null @@ -1,136 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -void compare_vals(CSRMatrix* A, BSRMatrix* B) -{ - A->sort(); - B->sort(); - int ctr = 0; - for (int i = 0; i < B->n_rows; i++) - { - for (int k = 0; k < B->b_rows; k++) - { - for (int j = B->idx1[i]; j < B->idx1[i+1]; j++) - { - double* val = B->block_vals[j]; - for (int l = 0; l < B->b_cols; l++) - { - if (fabs(val[(k*B->b_cols) + l]) > zero_tol) - { - ASSERT_NEAR(val[k*B->b_cols + l], A->vals[ctr++], 1e-10); - } - } - } - } - } - -} - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // - -TEST(ParBlockMatrixTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - setenv("PPN", "4", 1); - - // Form standard anisotropic matrix - double eps = 0.001; - double theta = M_PI / 8.0; - int block_n = 2; - std::vector grid(2, num_procs*block_n); - double* stencil = diffusion_stencil_2d(eps, theta); - ParCSRMatrix* A = par_stencil_grid(stencil, grid.data(), 2); - ParBSRMatrix* A_bsr = A->to_ParBSR(block_n, block_n); - - ParVector x(A->global_num_rows, A->local_num_rows); - ParVector b(A->global_num_rows, A->local_num_rows); - ParVector tmp(A->global_num_rows, A->local_num_rows); - x.set_const_value(1.0); - - // Test Blocked Communication - std::vector std; - std::vector blocked; - std = A->comm->communicate(x); - blocked = A_bsr->comm->communicate(x, A_bsr->off_proc->b_cols); - ASSERT_EQ(std.size(), blocked.size()); - int n = std.size(); - for (int i = 0; i < n; i++) - ASSERT_NEAR(std[i], blocked[i], 1e-10); - - // Test Blocked SpMV - A->mult(x, b); - A_bsr->mult(x, tmp); - for (int i = 0; i < A->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test Blocked Transpose Communication - A->comm->communicate_T(*x.local.storage, *b.local.storage); - A_bsr->comm->communicate_T(*x.local.storage, *tmp.local.storage, A_bsr->off_proc->b_cols); - ASSERT_EQ(std.size(), blocked.size()); - for (int i = 0; i < n; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - // Test Blocked Transpose SpMV - A->mult_T(x, b); - A_bsr->mult_T(x, tmp); - for (int i = 0; i < A->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test Blocked TAPSpMVs - A->tap_comm = new TAPComm(A->partition, A->off_proc_column_map); - A_bsr->tap_comm = new TAPComm(A_bsr->partition, A_bsr->off_proc_column_map); - std = A->tap_comm->communicate(x); - blocked = A_bsr->tap_comm->communicate(x, A_bsr->off_proc->b_cols); - ASSERT_EQ(std.size(), blocked.size()); - - A->tap_mult(x, b); - A_bsr->tap_mult(x, tmp); - for (int i = 0; i < A->local_num_rows; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - // Test Blocked Transpose TAPSpMVs - A->tap_comm->communicate_T(*x.local.storage, *b.local.storage); - A_bsr->comm->communicate_T(*x.local.storage, *tmp.local.storage, A_bsr->off_proc->b_cols); - ASSERT_EQ(std.size(), blocked.size()); - for (int i = 0; i < n; i++) - ASSERT_NEAR(b[i], tmp[i], 1e-10); - - // Test Blocked Transpose TAPSpMV - A->tap_mult_T(x, b); - A_bsr->tap_mult_T(x, tmp); - for (int i = 0; i < A->local_num_rows; i++) - ASSERT_NEAR(tmp[i], b[i], 1e-10); - - // Test Blocked Matrix Communication - CSRMatrix* C = A->comm->communicate(A); - BSRMatrix* C_bsr = (BSRMatrix*) A_bsr->comm->communicate(A_bsr); - C->sort(); - C_bsr->sort(); - ASSERT_EQ(C->n_rows, C_bsr->n_rows * C_bsr->b_rows); - compare_vals(C, C_bsr); - delete C; - delete C_bsr; - - delete A; - delete A_bsr; - - setenv("PPN", "16", 1); - - -} // end of TEST(MatrixTest, TestsInCore) // diff --git a/raptor/core/tests/test_par_bsr.cpp b/raptor/core/tests/test_par_bsr.cpp deleted file mode 100644 index ebf4b3b5..00000000 --- a/raptor/core/tests/test_par_bsr.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // - -TEST(ParBSRMatrixTest, TestsInCore) -{ - /*int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - std::vector row_ptr = {0, 3, 5, 8, 11, 13, 16}; - std::vector indices = {0, 1, 4, 1, 3, 1, 2, 5, 1, 3, 4, 0, 4, 2, 4, 5}; - std::vector data = {1,0,2,1, 6,7,8,2, 1,0,0,1, 1,4,5,1, 2,0,0,0, 4,3,0,0, - 7,2,0,0, 3,0,1,0, 1,0,0,1, 1,0,2,1, 6,7,8,2, 2,0,0,0, - 1,4,5,1, 3,0,1,0, 4,3,0,0, 7,2,0,0}; - - std::vector> on_blocks = {{1,0,2,1}, {6,7,8,2}, {1,4,5,1}, - {4,3,0,0}, {7,2,0,0}}; - std::vector> on_indx = {{0,0}, {0,1}, {1,1}, {2,1}, {2,2}}; - - std::vector> off_blocks = {{1,0,0,1}, {2,0,0,0}, {3,0,1,0}}; - std::vector> off_indx = {{0,4}, {1,3}, {2,5}}; - - // Create matrices for comparison - BSRMatrix* A_bsr = new BSRMatrix(12, 12, 2, 2, row_ptr, indices, data); - COOMatrix* A_coo = A_bsr->to_COO(); - ParBSRMatrix* A_par_bsr = new ParBSRMatrix(12, 12, 2, 2); - - // Add on_proc blocks - for (int i=0; iadd_block(on_indx[i][0], on_indx[i][1], on_blocks[i]); - A_par_bsr->add_block(on_indx[i][0]+3, on_indx[i][1]+3, on_blocks[i]); - } - - // Add off_proc blocks - for(int i=0; iadd_block(off_indx[i][0], off_indx[i][1], off_blocks[i]); - A_par_bsr->add_block(off_indx[i][0]+3, off_indx[i][1]-3, off_blocks[i]); - } - - // Finalize ParBSRMatrix and create on and off process maps - A_par_bsr->finalize(true, 2); - - // Compare nnz - int lcl_nnz = A_par_bsr->local_nnz; - int nnz; - MPI_Allreduce(&lcl_nnz, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - ASSERT_EQ(A_bsr->nnz, nnz); - - // Compare n_blocks - int lcl_nblocks = A_par_bsr->on_proc->idx2.size() + A_par_bsr->off_proc->idx2.size(); - int nblocks; - MPI_Allreduce(&lcl_nblocks,& nblocks, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - ASSERT_EQ(A_bsr->n_blocks, nblocks); - - // Create dense matrix to compare against - std::vector A_dense = A_bsr->to_dense(); - - // Compare row_ptrs, indices, and data - if (num_procs <= 1) - { - for (int i=0; ion_proc->idx1.size(); i++) - { - ASSERT_EQ(A_bsr->idx1[i], A_par_bsr->on_proc->idx1[i]); - } - for (int i=0; ion_proc->idx2.size(); i++) - { - ASSERT_EQ(A_bsr->idx2[i], A_par_bsr->on_proc->idx2[i]); - } - for (int i=0; ion_proc->vals.size(); i++) - { - ASSERT_EQ(A_bsr->vals[i], A_par_bsr->on_proc->vals[i]); - } - - } - else - { - int block_rows = A_par_bsr->b_rows; - int block_cols = A_par_bsr->b_cols; - int local_rows = A_par_bsr->local_num_rows; - - for (int i = 0; i < local_rows/block_rows; i++) - { - int start = A_par_bsr->on_proc->idx1[i]; - int end = A_par_bsr->on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - int upper_i = A_par_bsr->local_row_map[i*block_rows]; - int upper_j = A_par_bsr->on_proc_column_map[(A_par_bsr->on_proc->idx2[j])*block_cols]; - int data_offset = j * block_rows * block_cols; - for (int bi = 0; bi < block_rows; bi++) - { - for (int bj = 0; bj < block_cols; bj++) - { - int glob_i = upper_i + bi; - int glob_j = upper_j + bj; - int ind = bi * block_cols + bj + data_offset; - double val = A_par_bsr->on_proc->vals[ind]; - int glob_ind = glob_i*12+glob_j; - ASSERT_NEAR(A_dense[glob_ind], val, zero_tol); - } - } - } - - start = A_par_bsr->off_proc->idx1[i]; - end = A_par_bsr->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - int upper_i = A_par_bsr->local_row_map[i*block_rows]; - int upper_j = A_par_bsr->off_proc_column_map[(A_par_bsr->off_proc->idx2[j])*block_cols]; - int data_offset = j * block_rows * block_cols; - for (int bi = 0; bi < block_rows; bi++) - { - for (int bj = 0; bj < block_cols; bj++) - { - int glob_i = upper_i + bi; - int glob_j = upper_j + bj; - int ind = bi * block_cols + bj + data_offset; - int glob_ind = glob_i*12+glob_j; - double val = A_par_bsr->off_proc->vals[ind]; - ASSERT_NEAR(A_dense[glob_i*12+glob_j], val, zero_tol); - } - } - } - } - } - - // Delete pointers - delete A_par_bsr; - delete A_bsr; - - */ - -} // end of TEST(ParMatrixTest, TestsInCore) // diff --git a/raptor/core/tests/test_par_comm.cpp b/raptor/core/tests/test_par_comm.cpp deleted file mode 100644 index d6d249d9..00000000 --- a/raptor/core/tests/test_par_comm.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" - -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; -} // end of main() // - -TEST(ParCommTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - double eps = 0.001; - double theta = M_PI / 8.0; - int grid[2] = {10, 10}; - int global_row, global_col; - int start, end; - double val; - double* stencil = diffusion_stencil_2d(eps, theta); - std::vector sendbuf; - std::vector seq_row; - - CSRMatrix* A_seq = stencil_grid(stencil, grid, 2); - - ParCSRMatrix* A = par_stencil_grid(stencil, grid, 2); - - ParVector x(A->global_num_rows, A->local_num_rows); - if (A->local_num_rows) - { - sendbuf.resize(A->local_num_rows); - for (int i = 0; i < A->local_num_rows; i++) - { - sendbuf[i] = A->local_row_map[i]; - } - } - - A->comm->communicate(sendbuf); - - for (int i = 0; i < A->off_proc_num_cols; i++) - { - ASSERT_EQ(A->comm->recv_data->int_buffer[i], A->off_proc_column_map[i]); - } - - seq_row.resize(A_seq->n_cols); - CSRMatrix* recv_mat = A->comm->communicate(A); - for (int i = 0; i < A->off_proc_num_cols; i++) - { - global_row = A->off_proc_column_map[i]; - start = A_seq->idx1[global_row]; - end = A_seq->idx1[global_row+1]; - for (int j = start; j < end; j++) - { - seq_row[A_seq->idx2[j]] = A_seq->vals[j]; - } - - start = recv_mat->idx1[i]; - end = recv_mat->idx1[i+1]; - for (int j = start; j < end; j++) - { - global_col = recv_mat->idx2[j]; - val = recv_mat->vals[j]; - ASSERT_NEAR(seq_row[global_col], val, 1e-06); - } - } - - delete recv_mat; - delete[] stencil; - delete A; - delete A_seq; - -} // end of TEST(ParCommTest, TestsInCore) // diff --git a/raptor/core/tests/test_par_matrix.cpp b/raptor/core/tests/test_par_matrix.cpp deleted file mode 100644 index e56fb1cd..00000000 --- a/raptor/core/tests/test_par_matrix.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // - -TEST(ParMatrixTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - double eps = 0.001; - double theta = M_PI / 8.0; - int grid[2] = {10, 10}; - double* stencil = diffusion_stencil_2d(eps, theta); - CSRMatrix* A = stencil_grid(stencil, grid, 2); - ParCSRMatrix* A_par = par_stencil_grid(stencil, grid, 2); - - ParCSCMatrix* A_par_csc = A_par->to_ParCSC(); - - int lcl_nnz = A_par->local_nnz; - int nnz; - MPI_Allreduce(&lcl_nnz, &nnz, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); - - ASSERT_EQ(A->nnz,nnz); - - double A_dense[10000] = {0}; - for (int i = 0; i < A->n_rows; i++) - { - for (int j = A->idx1[i]; j < A->idx1[i+1]; j++) - { - A_dense[i*100 + A->idx2[j]] = A->vals[j]; - } - } - - // Compare A_par to A_dense - for (int i = 0; i < A_par->local_num_rows; i++) - { - int row = A_par->local_row_map[i]; - for (int j = A_par->on_proc->idx1[i]; j < A_par->on_proc->idx1[i+1]; j++) - { - int col = A_par->on_proc_column_map[A_par->on_proc->idx2[j]]; - //ASSERT_LT((fabs(A_dense[row*100+col] - A_par->on_proc->vals[j])), zero_tol); - ASSERT_NEAR(A_dense[row*100+col], A_par->on_proc->vals[j], zero_tol); - } - - for (int j = A_par->off_proc->idx1[i]; j < A_par->off_proc->idx1[i+1]; j++) - { - int col = A_par->off_proc_column_map[A_par->off_proc->idx2[j]]; - ASSERT_NEAR(A_dense[row*100+col], A_par->off_proc->vals[j], zero_tol); - } - } - - // Compare A_par_csc to A_dense - for (int i = 0; i < A_par_csc->on_proc_num_cols; i++) - { - int col = A_par_csc->on_proc_column_map[i]; - for (int j = A_par_csc->on_proc->idx1[i]; j < A_par_csc->on_proc->idx1[i+1]; j++) - { - int row = A_par_csc->local_row_map[A_par_csc->on_proc->idx2[j]]; - ASSERT_NEAR(A_dense[row*100+col],A_par_csc->on_proc->vals[j], zero_tol); - } - } - - for (int i = 0; i < A_par_csc->off_proc_num_cols; i++) - { - int col = A_par_csc->off_proc_column_map[i]; - for (int j = A_par_csc->off_proc->idx1[i]; j < A_par_csc->off_proc->idx1[i+1]; j++) - { - int row = A_par_csc->local_row_map[A_par_csc->off_proc->idx2[j]]; - ASSERT_NEAR(A_dense[row*100+col], A_par_csc->off_proc->vals[j], zero_tol); - } - } - - delete[] stencil; - -} // end of TEST(ParMatrixTest, TestsInCore) // diff --git a/raptor/core/tests/test_par_transpose.cpp b/raptor/core/tests/test_par_transpose.cpp deleted file mode 100644 index 1c82d513..00000000 --- a/raptor/core/tests/test_par_transpose.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -#include "raptor/tests/par_compare.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // - -TEST(ParMatrixTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - ParCSRMatrix* A = readParMatrix("../../../../test_data/aniso.pm"); - ParCSRMatrix* AT_py = readParMatrix("../../../../test_data/aniso_T.pm"); - ParCSRMatrix* AT = (ParCSRMatrix*) A->transpose(); - A->sort(); - AT->sort(); - AT_py->sort(); - compare(AT, AT_py); - delete A; - delete AT_py; - delete AT; - - A = readParMatrix("../../../../test_data/laplacian.pm"); - AT_py = readParMatrix("../../../../test_data/laplacian_T.pm"); - AT = (ParCSRMatrix*) A->transpose(); - A->sort(); - AT->sort(); - AT_py->sort(); - compare(AT, AT_py); - delete A; - delete AT_py; - delete AT; - - -} // end of TEST(ParMatrixTest, TestsInCore) // - diff --git a/raptor/core/tests/test_par_vector.cpp b/raptor/core/tests/test_par_vector.cpp deleted file mode 100644 index 55843165..00000000 --- a/raptor/core/tests/test_par_vector.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; -} // end of main() // - -TEST(ParVectorTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - int global_n = 100; - int local_n = global_n / num_procs; - int first_n = rank * ( global_n / num_procs); - - if (global_n % num_procs > rank) - { - local_n++; - first_n += rank; - } - else - { - first_n += (global_n % num_procs); - } - - Vector v(global_n); - ParVector v_par(global_n, local_n); - - v.set_const_value(1.0); - v_par.set_const_value(1.0); - - Vector& v_par_l = v_par.local; - for (int i = 0; i < local_n; i++) - { - ASSERT_EQ( v[first_n+i], v_par_l[i] ); - //EXPECT_EQ( v[first_n+i], v_par_l[i] ); - //EXPECT_DOUBLE_EQ( v[first_n+i], v_par_l[i] ); - //EXPECT_FLOAT_EQ( v[first_n+i], v_par_l[i] ); - } - - for (int i = 0; i < global_n; i++) - { - srand(i); - v[i] = ((double)rand()) / RAND_MAX; - } - for (int i = 0; i < local_n; i++) - { - srand(i+first_n); - v_par_l[i] = ((double)rand()) / RAND_MAX; - } - - for (int i = 0; i < local_n; i++) - { - ASSERT_EQ(v[first_n+i], v_par_l[i]); - } - -} // end of TEST(ParVectorTest, TestsInCore) // - diff --git a/raptor/core/tests/test_tap_comm.cpp b/raptor/core/tests/test_tap_comm.cpp deleted file mode 100644 index b20c0134..00000000 --- a/raptor/core/tests/test_tap_comm.cpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -#include "raptor/tests/compare.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp=RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; - -} // end of main() // -TEST(TAPCommTest, TestsInCore) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - double eps = 0.001; - double theta = M_PI / 8.0; - int grid[2] = {25, 25}; - double* stencil = diffusion_stencil_2d(eps, theta); - std::vector tap_recv; - std::vector par_recv; - std::vector tap_simp_recv; - - ParCSRMatrix* A = par_stencil_grid(stencil, grid, 2); - //A->tap_comm = new TAPComm(A->partition, A->off_proc_column_map); - //TAPComm* simple_tap = new TAPComm(A->partition, A->off_proc_column_map, false); - A->init_tap_communicators(MPI_COMM_WORLD); - - ParVector x(A->global_num_rows, A->local_num_rows); - ParCSRMatrix* B = A->copy(); - - for (int i = 0; i < A->local_num_rows; i++) - { - x[i] = A->local_row_map[i]; - } - tap_recv = A->tap_comm->communicate(x); - tap_simp_recv = A->tap_mat_comm->communicate(x); - par_recv = A->comm->communicate(x); - ASSERT_EQ(tap_recv.size(), par_recv.size()); - ASSERT_EQ(tap_recv.size(), tap_simp_recv.size()); - for (int i = 0; i < (int)par_recv.size(); i++) - { - ASSERT_NEAR(par_recv[i], tap_recv[i], zero_tol); - ASSERT_NEAR(tap_recv[i], tap_simp_recv[i], zero_tol); - } - - x.set_rand_values(); - tap_recv = A->tap_comm->communicate(x); - tap_simp_recv = A->tap_mat_comm->communicate(x); - par_recv = A->comm->communicate(x); - ASSERT_EQ(tap_recv.size(), par_recv.size()); - ASSERT_EQ(tap_simp_recv.size(), tap_recv.size()); - for (int i = 0; i < (int)par_recv.size(); i++) - { - ASSERT_NEAR(par_recv[i], tap_recv[i], zero_tol); - ASSERT_NEAR(tap_recv[i], tap_simp_recv[i], zero_tol); - } - - CSRMatrix* recv_mat = A->comm->communicate(B); - CSRMatrix* tap_recv_mat = A->tap_comm->communicate(B); - CSRMatrix* tap_recv_simp_mat = A->tap_mat_comm->communicate(B); - compare(recv_mat, tap_recv_mat); - compare(tap_recv_mat, tap_recv_simp_mat); - delete recv_mat; - delete tap_recv_mat; - delete tap_recv_simp_mat; - - delete[] stencil; - delete A; - - -} // end of TEST(TAPCommTest, TestsInCore) // diff --git a/raptor/core/tests/test_transpose.cpp b/raptor/core/tests/test_transpose.cpp deleted file mode 100644 index 2dcd22de..00000000 --- a/raptor/core/tests/test_transpose.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -#include "raptor/tests/compare.hpp" - -using namespace raptor; - - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); - -} // end of main() // - -TEST(MatrixTest, TestsInCore) -{ - CSRMatrix* A = readMatrix("../../../../test_data/aniso.pm"); - CSRMatrix* AT_py = readMatrix("../../../../test_data/aniso_T.pm"); - CSRMatrix* AT = (CSRMatrix*) A->transpose(); - A->sort(); - AT->sort(); - AT_py->sort(); - compare(AT, AT_py); - delete A; - delete AT_py; - delete AT; - - A = readMatrix("../../../../test_data/laplacian.pm"); - AT_py = readMatrix("../../../../test_data/laplacian_T.pm"); - AT = (CSRMatrix*) A->transpose(); - A->sort(); - AT->sort(); - AT_py->sort(); - compare(AT, AT_py); - delete A; - delete AT_py; - delete AT; - - -} // end of TEST(MatrixTest, TestsInCore) // - - diff --git a/raptor/core/topology.hpp b/raptor/core/topology.hpp deleted file mode 100644 index ab8c772e..00000000 --- a/raptor/core/topology.hpp +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef TOPOLOGY_HPP -#define TOPOLOGY_HPP - -#include -#include -#include - -#include "types.hpp" - -/************************************************************** - ***** Topology Class - ************************************************************** - ***** This class holds information about the topology of - ***** the parallel computer on which Raptor is being run - ***** - ***** Attributes - ***** ------------- - ***** global_num_indices : index_t - ***** Number of rows to be partitioned - ***** first_local_idx : index_t - ***** First global index of a row in partition local to rank - ***** local_num_indices : index_t - ***** Number of rows local to rank's partition - ***** - ***** Methods - ***** --------- - **************************************************************/ -namespace raptor -{ - class Topology - { - public: - Topology(int _PPN = 16, int _standard_rank_ordering = 1) - { - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - int rank_node; - - char* proc_layout_c = getenv("RAPtor_MPICH_RANK_REORDER_METHOD"); - char* PPN_c = getenv("PPN"); - if (PPN_c) - { - PPN = atoi(PPN_c); - } - else - { - PPN = _PPN; - } - - if (proc_layout_c) - { - rank_ordering = atoi(proc_layout_c); - } - else - { - rank_ordering = _standard_rank_ordering; - } - - num_nodes = num_procs / PPN; - if (num_procs % PPN) num_nodes++; - rank_node = get_node(rank); - - // Create intra-node communicator - RAPtor_MPI_Comm_split(RAPtor_MPI_COMM_WORLD, rank_node, rank, &local_comm); - num_shared = 0; - } - - ~Topology() - { - RAPtor_MPI_Comm_free(&local_comm); - } - - int get_node(int proc) - { - if (rank_ordering == 0) - { - return proc % num_nodes; - } - else if (rank_ordering == 1) - { - return proc / PPN; - } - else if (rank_ordering == 2) - { - if ((proc / num_nodes) % 2 == 0) - { - return proc % num_nodes; - } - else - { - return num_nodes - (proc % num_nodes) - 1; - } - } - else - { - int rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - if (rank == 0) - { - printf("This RAPtor_MPI rank ordering is not supported!\n"); - } - return -1; - } - } - - int get_local_proc(int proc) - { - if (rank_ordering == 0 || rank_ordering == 2) - { - return proc / num_nodes; - } - else if (rank_ordering == 1) - { - return proc % PPN; - } - else - { - int rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - if (rank == 0) - { - printf("This RAPtor_MPI rank ordering is not supported!\n"); - } - return -1; - } - } - - int get_global_proc(int node, int local_proc) - { - if (rank_ordering == 0) - { - return local_proc * num_nodes + node; - } - else if (rank_ordering == 1) - { - return local_proc + (node * PPN); - } - else if (rank_ordering == 2) - { - if (local_proc % 2 == 0) - { - return local_proc * num_nodes + node; - } - else - { - return local_proc * num_nodes + num_nodes - node - 1; - } - } - else - { - int rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - if (rank == 0) - { - printf("This RAPtor_MPI rank ordering is not supported!\n"); - } - return -1; - } - } - - int PPN; - int rank_ordering; - int num_shared; - int num_nodes; - - RAPtor_MPI_Comm local_comm; - }; -} - -#endif diff --git a/raptor/core/types.hpp b/raptor/core/types.hpp deleted file mode 100644 index 850a4627..00000000 --- a/raptor/core/types.hpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_TYPES_HPP_ -#define RAPTOR_CORE_TYPES_HPP_ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define zero_tol 1e-16 -#define RAPtor_MPI_INDEX_T MPI_INT -#define RAPtor_MPI_DATA_T MPI_DOUBLE - -// Defines for CF splitting and aggregation -#define TmpSelection 4 -#define NewSelection 3 -#define NewUnselection 2 -#define Selected 1 -#define Unselected 0 -#define Unassigned -1 -#define NoNeighbors -2 - - -// Global Timing Variables -struct PairData -{ - double val; - int index; -}; - -namespace raptor -{ - using data_t = double; - using index_t = int; - enum strength_t {Classical, Symmetric}; - enum format_t {COO, CSR, CSC, BCOO, BSR, BSC}; - enum coarsen_t {RS, CLJP, Falgout, PMIS, HMIS}; - enum interp_t {Direct, ModClassical, Extended}; - enum agg_t {MIS}; - enum prolong_t {JacobiProlongation}; - enum relax_t {Jacobi, SOR, SSOR}; - - template - U sum_func(const U& a, const T&b) - { - return a + b; - } - - template - U max_func(const U& a, const T&b) - { - if (a > b) - { - return a; - } - else - { - return b; - } - } -} - -#endif diff --git a/raptor/core/utilities.hpp b/raptor/core/utilities.hpp deleted file mode 100644 index e586826b..00000000 --- a/raptor/core/utilities.hpp +++ /dev/null @@ -1,211 +0,0 @@ -#ifndef RAPTOR_CORE_UTILITIES_HPP -#define RAPTOR_CORE_UTILITIES_HPP - -#include -#include -#include - -#include "types.hpp" - -// BLAS LU routine that is used for coarse solve -extern "C" void dgetrf_(int* dim1, int* dim2, double* a, int* lda, - int* ipiv, int* info); -extern "C" void dgetrs_(char *TRANS, int *N, int *NRHS, double *A, - int *LDA, int *IPIV, double *B, int *LDB, int *INFO ); - -namespace raptor { -template -void vec_sort(std::vector& vec1, std::vector& vec2, int start = 0, int end = -1) -{ - vec1.shrink_to_fit(); - vec2.shrink_to_fit(); - - int k, prev_k; - int n = vec1.size(); - if (end < 0) end = n; - int size = end - start; - - std::vector p(size); - std::vector done(size, false); - - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), - [&](const int i, const int j) - { - return vec1[i+start] < vec1[j+start]; - }); - for (int i = 0; i < size; i++) - { - if (done[i]) continue; - done[i] = true; - prev_k = i; - k = p[i]; - while (i != k) - { - std::swap(vec1[prev_k + start], vec1[k + start]); - std::swap(vec2[prev_k + start], vec2[k + start]); - done[k] = true; - prev_k = k; - k = p[k]; - } - } -} - -template -void vec_sort(std::vector& vec1, std::vector& vec2, - std::vector& vec3, - int start = 0, int end = -1) -{ - vec1.shrink_to_fit(); - vec2.shrink_to_fit(); - vec3.shrink_to_fit(); - - int k, prev_k; - int n = vec1.size(); - if (end < 0) end = n; - int size = end - start; - - std::vector p(size); - std::vector done(size, false); - - std::iota(p.begin(), p.end(), 0); - std::sort(p.begin(), p.end(), - [&](const int i, const int j) - { - int idx1 = i + start; - int idx2 = j + start; - if (vec1[idx1] == vec1[idx2]) - return vec2[idx1] < vec2[idx2]; - else - return vec1[idx1] < vec1[idx2]; - }); - for (int i = 0; i < size; i++) - { - if (done[i]) continue; - done[i] = true; - prev_k = i; - k = p[i]; - while (i != k) - { - std::swap(vec1[prev_k + start], vec1[k + start]); - std::swap(vec2[prev_k + start], vec2[k + start]); - std::swap(vec3[prev_k + start], vec3[k + start]); - done[k] = true; - prev_k = k; - k = p[k]; - } - } -} - - -enum extents : std::size_t { - dynamic_extent = std::numeric_limits::max() -}; -template -struct extent_storage -{ - extent_storage(std::size_t) {} - constexpr std::size_t value() const { return E; } -}; -template <> -struct extent_storage -{ - constexpr std::size_t value() const { return e; } - std::size_t e; -}; - - -template -struct span { - using element_type = T; - using value_type = typename std::remove_cv::type; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - using pointer = T*; - using const_pointer = const T*; - using reference = T&; - using const_reference = const T&; - using iterator = T*; - using reverse_iterator = std::reverse_iterator; - - static constexpr std::size_t extent = Extent; - - - template::type> - span() : b(nullptr), ext{0} {} - - constexpr span(pointer p, size_type s) : b(p), ext{s} {} - - constexpr span(std::vector & v) : - span(v.data(), v.size()) {} - - constexpr iterator begin() const noexcept { - return b; - } - - constexpr iterator end() const noexcept { - return b + size(); - } - - constexpr reverse_iterator rbegin() const noexcept { - return reverse_iterator(end()); - } - - constexpr reverse_iterator rend() const noexcept { - return reverse_iterator(begin()); - } - - constexpr reference front() const { - return *b; - } - - constexpr reference back() const { - return *(b + (size() - 1)); - } - - constexpr reference operator[](size_type idx) const { - return begin()[idx]; - } - - constexpr pointer data() const noexcept { - return b; - } - - constexpr size_type size() const noexcept { - return ext.value(); - } - - constexpr size_type size_bytes() const noexcept { - return sizeof(T)*size(); - } - - [[nodiscard]] constexpr bool empty() const noexcept { - return size() == 0; - } - - template - constexpr span first() const noexcept { - return {b, Count}; - } - - constexpr span first(size_type count) const noexcept { - return {data(), count}; - } - - template - constexpr span last() const noexcept { - return {data() + (size() - Count), Count}; - } - - constexpr span last(size_type count) const noexcept { - return {data() + (size() - count), count}; - } - -protected: - pointer b; - extent_storage ext; -}; - -} -#endif diff --git a/raptor/core/vector.cpp b/raptor/core/vector.cpp deleted file mode 100644 index 5154a139..00000000 --- a/raptor/core/vector.cpp +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "vector.hpp" - -using namespace raptor; - -/************************************************************** -***** Vector Set Constant Value -************************************************************** -***** Initializes the vector to a constant value -***** -***** Parameters -***** ------------- -***** alpha : data_t -***** Constant value to set each element of vector to -**************************************************************/ -void Vector::set_const_value(data_t alpha) -{ - for (index_t i = 0; i < size(); i++) - { - values[i] = alpha; - } -} - -/************************************************************** -***** Vector Set Random Values -************************************************************** -***** Initializes each element of the vector to a random -***** value -**************************************************************/ -void Vector::set_rand_values() -{ - srand(time(NULL)); - for (index_t i = 0; i < size(); i++) - { - values[i] = ((double)rand()) / RAND_MAX; - } -} - -/************************************************************** -***** Vector AXPY -************************************************************** -***** Multiplies the vector x by a constant, alpha, and then -***** sums each element with corresponding local entry -***** -***** Parameters -***** ------------- -***** x : Vector& -***** Vector to be summed with -***** alpha : data_t -***** Constant value to multiply each element of vector by -**************************************************************/ -void Vector::axpy(Vector& x, data_t alpha) -{ - for (index_t i = 0; i < size(); i++) - { - values[i] += x.values[i]*alpha; - } -} - -/************************************************************** -***** Vector Copy -************************************************************** -***** Copies each vector value of y into values -***** -***** Parameters -***** ------------- -***** y : Vector& -***** Vector to be copied. Must have same local rows -***** and same first row -**************************************************************/ -void Vector::copy(const Vector& y) -{ - if (!storage) storage = std::make_shared(); - resize(y.size()); - std::copy(y.values.begin(), y.values.end(), values.begin()); -} - -/************************************************************** -***** Vector Scale -************************************************************** -***** Multiplies each element of the vector by a constant value -***** -***** Parameters -***** ------------- -***** alpha : data_t -***** Constant value to set multiply element of vector by -**************************************************************/ -void Vector::scale(data_t alpha) -{ - for (index_t i = 0; i < size(); i++) - { - values[i] *= alpha; - } -} - -/************************************************************** -***** Vector Norm -************************************************************** -***** Calculates the P norm of the vector (for a given P) -***** -***** Parameters -***** ------------- -***** p : index_t -***** Determines which p-norm to calculate -**************************************************************/ -data_t Vector::norm(index_t p) -{ - data_t result = 0.0; - double val; - for (index_t i = 0; i < size(); i++) - { - val = values[i]; - if (fabs(val) > zero_tol) - result += pow(val, p); - } - return pow(result, 1.0/p); -} - -/************************************************************** -***** Print Vector -************************************************************** -***** Prints all nonzero elements in vector -***** -***** Parameters -***** ------------- -***** vec_name : const char* (optional) -***** Name to be printed. Default prints Vec[%d] = %e. -**************************************************************/ -void Vector::print(const char* vec_name) -{ - printf("Size = %d\n", size()); - for (int i = 0; i < size(); i++) - { - if (fabs(values[i]) > zero_tol) - printf("%s[%d] = %e\n", vec_name, i, values[i]); - } -} - -/************************************************************** -***** Vector Element Access -************************************************************** -***** Function overload for element access -***** -***** Returns -***** ------------ -***** data_t& element at position passed -**************************************************************/ -data_t& Vector::operator[](const int index) -{ - return values[index]; -} - - -data_t Vector::inner_product(Vector& x) -{ - data_t result = 0.0; - - for (int i = 0; i < size(); i++) - { - result += values[i] * x[i]; - } - - return result; -} - - diff --git a/raptor/core/vector.hpp b/raptor/core/vector.hpp deleted file mode 100644 index 12f235fc..00000000 --- a/raptor/core/vector.hpp +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef RAPTOR_CORE_VECTOR_HPP_ -#define RAPTOR_CORE_VECTOR_HPP_ - -#include - -#include "types.hpp" -#include "utilities.hpp" - -// Vector Class -// -// This class constructs a vector, supporting simple linear -// algebra operations. -// -// Attributes -// ------------- -// values : std::vector -// stl vector of vector values -// size : index_t -// Dimension of vector -// -// Methods -// ------- -// set_const_value(data_t alpha) -// Sets the vector to a constant value -// set_rand_values() -// Sets each element of the vector to a random value -// axpy(Vector& y, data_t alpha) -// Multiplies each element by a constant, alpha, and then -// adds corresponding values from y -// scale(data_t alpha) -// Multiplies entries of vector by a constant -// norm(index_t p) -// Calculates the p-norm of the vector -// print() -// Prints the nonzero values and positions -// data() -// Returns the data values as a data_t* -// -namespace raptor { - -class Vector -{ -public: - /************************************************************** - ***** Vector Class Constructor - ************************************************************** - ***** Initializes an empty vector of the given size - ***** - ***** Parameters - ***** ------------- - ***** len : index_t - ***** Size of the vector - **************************************************************/ - Vector(int len) : - storage(std::make_shared(len)), - values(*storage) - {} - - /************************************************************** - ***** Vector Class Constructor - ************************************************************** - ***** Initializes an empty vector without setting the size - **************************************************************/ - Vector() : - storage(std::make_shared()), - values(*storage) - {} - - Vector(double * base, std::size_t len) : - values(base, len) {} - - Vector(const Vector& v) - { - copy(v); - } - - void resize(std::size_t len) - { - if (owns_data()) { - storage->resize(len); - values = span(*storage); - } else { - assert(len <= values.size()); - values = values.first(len); - } - } - - bool owns_data() { - return static_cast(storage); - } - - /************************************************************** - ***** Vector Set Constant Value - ************************************************************** - ***** Initializes the vector to a constant value - ***** - ***** Parameters - ***** ------------- - ***** alpha : data_t - ***** Constant value to set each element of vector to - **************************************************************/ - void set_const_value(data_t alpha); - - /************************************************************** - ***** Vector Set Random Values - ************************************************************** - ***** Initializes each element of the vector to a random - ***** value - **************************************************************/ - void set_rand_values(); - - /************************************************************** - ***** Vector AXPY - ************************************************************** - ***** Multiplies the vector by a constant, alpha, and then - ***** sums each element with corresponding entry of Y - ***** - ***** Parameters - ***** ------------- - ***** y : Vector& - ***** Vector to be summed with - ***** alpha : data_t - ***** Constant value to multiply each element of vector by - **************************************************************/ - void axpy(Vector& y, data_t alpha); - - /************************************************************** - ***** Vector Copy - ************************************************************** - ***** Copies each vector value of y into values - ***** - ***** Parameters - ***** ------------- - ***** y : Vector& - ***** Vector to be copied - **************************************************************/ - void copy(const Vector& y); - - /************************************************************** - ***** Vector Scale - ************************************************************** - ***** Multiplies each element of the vector by a constant value - ***** - ***** Parameters - ***** ------------- - ***** alpha : data_t - ***** Constant value to set multiply element of vector by - **************************************************************/ - void scale(data_t alpha); - - /************************************************************** - ***** Vector Norm - ************************************************************** - ***** Calculates the P norm of the vector (for a given P) - ***** - ***** Parameters - ***** ------------- - ***** p : index_t - ***** Determines which p-norm to calculate - **************************************************************/ - data_t norm(index_t p); - - /************************************************************** - ***** Print Vector - ************************************************************** - ***** Prints all nonzero elements in vector - ***** - ***** Parameters - ***** ------------- - ***** vec_name : const char* (optional) - ***** Name to be printed. Default prints Vec[%d] = %e. - **************************************************************/ - void print(const char* vec_name = "Vec"); - - /************************************************************** - ***** Vector Element Access - ************************************************************** - ***** Function overload for element access - ***** - ***** Returns - ***** ------------ - ***** data_t& element at position passed - **************************************************************/ - data_t& operator[](const int index); - - /************************************************************** - ***** Vector Data - ************************************************************** - ***** Returns pointer to vector entries - ***** - ***** Returns - ***** ------------- - ***** data_t* - ***** Pointer to values of vector - **************************************************************/ - data_t* data() - { - return values.data(); - } - - index_t size() const - { - return values.size(); - } - - data_t inner_product(Vector& x); - - void set_base(double *base) { - auto sz = size(); - if (storage) storage.reset(); - values = span(base, sz); - } - - using storage_type = std::vector; - std::shared_ptr storage; - span values; -}; - -} - - -#endif diff --git a/raptor/gallery/CMakeLists.txt b/raptor/gallery/CMakeLists.txt deleted file mode 100644 index 9ee05889..00000000 --- a/raptor/gallery/CMakeLists.txt +++ /dev/null @@ -1,47 +0,0 @@ -# Include the directory itself as a path to include directories -set(CMAKE_INCLUDE_CURRENT_DIR ON) - -# Create a variable called gallery_SOURCES containing all .cpp files: -if (WITH_MPI) - set(par_gallery_HEADERS - gallery/par_stencil.hpp - gallery/par_random.hpp - gallery/par_matrix_IO.hpp - gallery/par_matrix_market.hpp - ) - set(par_gallery_SOURCES - gallery/par_stencil.cpp - gallery/par_random.cpp - gallery/par_matrix_IO.cpp - gallery/par_matrix_market.cpp - ) -else () - set(par_gallery_HEADERS - "" - ) - set(par_gallery_SOURCES - "" - ) -endif() - -set(gallery_HEADERS - gallery/diffusion.hpp - gallery/laplacian27pt.hpp - gallery/stencil.hpp - gallery/random.hpp - gallery/matrix_IO.hpp - gallery/matrix_market.hpp - ${par_gallery_HEADERS} - PARENT_SCOPE) - -set(gallery_SOURCES - gallery/diffusion.cpp - gallery/laplacian27pt.cpp - gallery/stencil.cpp - gallery/random.cpp - gallery/matrix_IO.cpp - gallery/matrix_market.cpp - ${par_gallery_SOURCES} - PARENT_SCOPE) - - diff --git a/raptor/gallery/diffusion.cpp b/raptor/gallery/diffusion.cpp deleted file mode 100644 index 470fd626..00000000 --- a/raptor/gallery/diffusion.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "diffusion.hpp" - -namespace raptor { -// diffusion_stencil_2d -// -// Generate a diffusion stencil -// -// Supports isotropic diffusion (FE,FD), anisotropic diffusion (FE, FD), and -// rotated anisotropic diffusion (FD). -// -// Rotated Anisotropic diffusion in 2d of the form: -// -// -div Q A Q^T grad u -// -// Q = [cos(theta) -sin(theta)] -// [sin(theta) cos(theta)] -// -// A = [1 0 ] -// [0 eps ] -// -// Parameters -// ---------- -// epsilon : double, optional -// Anisotropic diffusion coefficient: -div A grad u, -// where A = [1 0; 0 epsilon]. The default is isotropic, epsilon=1.0 -// theta : double, optional -// Rotation angle `theta` in radians defines -div Q A Q^T grad, -// where Q = [cos(`theta`) -sin(`theta`); sin(`theta`) cos(`theta`)]. -// type : {'FE','FD'} -// Specifies the discretization as Q1 finite element (FE) or 2nd order -// finite difference (FD) -// The default is `theta` = 0.0 -// -// Returns -// ------- -// stencil : numpy array -// A 3x3 diffusion stencil -// -// See Also -// -------- -// stencil_grid -// -// Notes -// ----- -// Not all combinations are supported. -// -// TODO -// ---- -// Add FD option -// -data_t* diffusion_stencil_2d(data_t eps, data_t theta) -{ - data_t* stencil = new data_t[9]; - - data_t C = cos(theta); - data_t S = sin(theta); - data_t CS = C*S; - data_t CC = C*C; - data_t SS = S*S; - - data_t val1 = ((-1*eps - 1)*CC + (-1*eps - 1)*SS + ( 3*eps - 3)*CS) / 6.0; - data_t val2 = (( 2*eps - 4)*CC + (-4*eps + 2)*SS) / 6.0; - data_t val3 = ((-1*eps - 1)*CC + (-1*eps - 1)*SS + (-3*eps + 3)*CS) / 6.0; - data_t val4 = ((-4*eps + 2)*CC + ( 2*eps - 4)*SS) / 6.0; - data_t val5 = (( 8*eps + 8)*CC + ( 8*eps + 8)*SS) / 6.0; - - stencil[0] = val1; - stencil[1] = val2; - stencil[2] = val3; - stencil[3] = val4; - stencil[4] = val5; - stencil[5] = val4; - stencil[6] = val3; - stencil[7] = val2; - stencil[8] = val1; - - return stencil; -} - -} diff --git a/raptor/gallery/diffusion.hpp b/raptor/gallery/diffusion.hpp deleted file mode 100644 index bc49904a..00000000 --- a/raptor/gallery/diffusion.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef DIFFUSION_HPP -#define DIFFUSION_HPP - -#include "raptor/core/types.hpp" - -namespace raptor { -// diffusion_stencil_2d -// -// Generate a diffusion stencil -// -// Supports isotropic diffusion (FE,FD), anisotropic diffusion (FE, FD), and -// rotated anisotropic diffusion (FD). -// -// Rotated Anisotropic diffusion in 2d of the form: -// -// -div Q A Q^T grad u -// -// Q = [cos(theta) -sin(theta)] -// [sin(theta) cos(theta)] -// -// A = [1 0 ] -// [0 eps ] -// -// Parameters -// ---------- -// epsilon : double, optional -// Anisotropic diffusion coefficient: -div A grad u, -// where A = [1 0; 0 epsilon]. The default is isotropic, epsilon=1.0 -// theta : double, optional -// Rotation angle `theta` in radians defines -div Q A Q^T grad, -// where Q = [cos(`theta`) -sin(`theta`); sin(`theta`) cos(`theta`)]. -// type : {'FE','FD'} -// Specifies the discretization as Q1 finite element (FE) or 2nd order -// finite difference (FD) -// The default is `theta` = 0.0 -// -// Returns -// ------- -// stencil : numpy array -// A 3x3 diffusion stencil -// -// See Also -// -------- -// stencil_grid -// -// Notes -// ----- -// Not all combinations are supported. -// -// TODO -// ---- -// Add FD option -// -data_t* diffusion_stencil_2d(data_t eps = 1.0, data_t theta = 0.0); - -} - -#endif diff --git a/raptor/gallery/laplacian27pt.cpp b/raptor/gallery/laplacian27pt.cpp deleted file mode 100644 index 012037a3..00000000 --- a/raptor/gallery/laplacian27pt.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "laplacian27pt.hpp" - -namespace raptor { -// 27 Point Laplacian Stencil -// -// Generate a 27-point laplacian stencil -// -// Returns -// ------- -// stencil : numpy array -// A 3x3 diffusion stencil -// -// See Also -// -------- -// stencil_grid -// -// - -data_t* laplace_stencil_27pt() -{ - data_t* stencil = new data_t[27]; - - for (int i = 0; i < 27; i++) - { - stencil[i] = -1; - } - - stencil[13] = 26; - - return stencil; -} - -} diff --git a/raptor/gallery/laplacian27pt.hpp b/raptor/gallery/laplacian27pt.hpp deleted file mode 100644 index 3a843ec2..00000000 --- a/raptor/gallery/laplacian27pt.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#ifndef LAPLACIAN27PT_HPP -#define LAPLACIAN27PT_HPP - -//#include -#include "raptor/core/types.hpp" -#include - -namespace raptor { - -// 27 Point Laplacian Stencil -// -// Generate a 27-point laplacian stencil -// -// Returns -// ------- -// stencil : numpy array -// A 3x3 diffusion stencil -// -// See Also -// -------- -// stencil_grid -// -// - -data_t* laplace_stencil_27pt(); - -} -#endif diff --git a/raptor/gallery/matrix_IO.cpp b/raptor/gallery/matrix_IO.cpp deleted file mode 100644 index e1eb166c..00000000 --- a/raptor/gallery/matrix_IO.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "matrix_IO.hpp" -#include -#include -#include -#include // std::cout -#include // std::ifstream - -namespace { -bool little_endian() -{ - int num = 1; - return (*(char *)&num == 1); -} - -template -void endian_swap(T *objp) -{ - unsigned char *memp = reinterpret_cast(objp); - std::reverse(memp, memp + sizeof(T)); -} -} - -namespace raptor { -CSRMatrix* readMatrix(const char* filename) -{ - CSRMatrix* A; - - int32_t code; - int32_t n_rows; - int32_t n_cols; - int32_t nnz; - int32_t idx; - double val; - - int sizeof_dbl = sizeof(val); - int sizeof_int32 = sizeof(code); - bool is_little_endian = false; - - std::ifstream ifs (filename, std::ifstream::binary); - ifs.read(reinterpret_cast(&code), sizeof_int32); - ifs.read(reinterpret_cast(&n_rows), sizeof_int32); - ifs.read(reinterpret_cast(&n_cols), sizeof_int32); - ifs.read(reinterpret_cast(&nnz), sizeof_int32); - - if (code != PETSC_MAT_CODE) - { - is_little_endian = true; - endian_swap(&code); - endian_swap(&n_rows); - endian_swap(&n_cols); - endian_swap(&nnz); - } - - assert(code == PETSC_MAT_CODE); - - A = new CSRMatrix(n_rows, n_cols, nnz); - - int displ = 0; - A->idx1[0] = 0; - if (is_little_endian) - { - for (int32_t i = 0; i < n_rows; i++) - { - ifs.read(reinterpret_cast(&idx), sizeof_int32); - endian_swap(&idx); - displ += idx; - A->idx1[i+1] = displ; - } - for (int32_t i = 0; i < nnz; i++) - { - ifs.read(reinterpret_cast(&idx), sizeof_int32); - endian_swap(&idx); - A->idx2.emplace_back(idx); - } - for (int32_t i = 0; i < nnz; i++) - { - ifs.read(reinterpret_cast(&val), sizeof_dbl); - endian_swap(&val); - A->vals.emplace_back(val); - } - } - else - { - for (int32_t i = 0; i < n_rows; i++) - { - ifs.read(reinterpret_cast(&idx), sizeof_int32); - displ += idx; - A->idx1[i+1] = displ; - } - for (int32_t i = 0; i < nnz; i++) - { - ifs.read(reinterpret_cast(&idx), sizeof_int32); - A->idx2.emplace_back(idx); - } - for (int32_t i = 0; i < nnz; i++) - { - ifs.read(reinterpret_cast(&val), sizeof_dbl); - endian_swap(&val); - A->vals.emplace_back(val); - } - } - A->nnz = A->idx2.size(); - - ifs.close(); - - return A; - -} -} diff --git a/raptor/gallery/matrix_IO.hpp b/raptor/gallery/matrix_IO.hpp deleted file mode 100644 index b48e1578..00000000 --- a/raptor/gallery/matrix_IO.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef MATRIX_IO_H -#define MATRIX_IO_H - -#define PETSC_MAT_CODE 1211216 - -//#include -#include -#include -#include -#include - -#include "raptor/core/matrix.hpp" -#include "raptor/core/types.hpp" - -namespace raptor { - -CSRMatrix* readMatrix(const char* filename); - -} -#endif - diff --git a/raptor/gallery/matrix_market.cpp b/raptor/gallery/matrix_market.cpp deleted file mode 100644 index 822ac102..00000000 --- a/raptor/gallery/matrix_market.cpp +++ /dev/null @@ -1,538 +0,0 @@ -/* -* Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -#include -#include -#include -#include - -#include "matrix_market.hpp" - -namespace raptor { - -// Declare Private Methods -char *mm_strdup(const char *s); -int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, - double **val, MM_typecode *matcode); - -CSRMatrix* read_mm(const char *fname) -{ - FILE *f; - MM_typecode matcode; - int M, N, nz; - int i; - int row, col; - double val; - - if ((f = fopen(fname, "r")) == NULL) - return NULL; - - - if (mm_read_banner(f, &matcode) != 0) - { - printf("mm_read_unsymetric: Could not process Matrix Market banner "); - printf(" in file [%s]\n", fname); - return NULL; - } - - - - if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && - mm_is_sparse(matcode))) - { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", - mm_typecode_to_str(matcode)); - return NULL; - } - - /* find out size of sparse matrix: M, N, nz .... */ - - if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) - { - fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); - return NULL; - } - - /* reseve memory for matrices */ - COOMatrix* A = new COOMatrix(M, N, nz); - - /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ - /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ - /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - - for (i=0; iadd_value(row - 1, col - 1, val); - } - fclose(f); - - CSRMatrix* A_csr = A->to_CSR(); - delete A; - - return A_csr; -} - - -void write_mm(CSRMatrix* A, const char *fname) -{ - FILE *f; - MM_typecode matcode; - int start, end; - - if ((f = fopen(fname, "w")) == NULL) - return; - - mm_initialize_typecode(&matcode); - mm_set_matrix(&matcode); - mm_set_coordinate(&matcode); - mm_set_real(&matcode); - - mm_write_banner(f, matcode); - fprintf(f, "%%\n"); - mm_write_mtx_crd_size(f, A->n_rows, A->n_cols, A->nnz); - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - fprintf(f, "%d %d %2.15e\n", i+1, A->idx2[j]+1, A->vals[j]); - } - } - fclose(f); -} - -int mm_is_valid(MM_typecode matcode) -{ - if (!mm_is_matrix(matcode)) return 0; - if (mm_is_dense(matcode) && mm_is_pattern(matcode)) return 0; - if (mm_is_real(matcode) && mm_is_hermitian(matcode)) return 0; - if (mm_is_pattern(matcode) && (mm_is_hermitian(matcode) || - mm_is_skew(matcode))) return 0; - return 1; -} - -int mm_read_banner(FILE *f, MM_typecode *matcode) -{ - char line[MM_MAX_LINE_LENGTH]; - char banner[MM_MAX_TOKEN_LENGTH]; - char mtx[MM_MAX_TOKEN_LENGTH]; - char crd[MM_MAX_TOKEN_LENGTH]; - char data_type[MM_MAX_TOKEN_LENGTH]; - char storage_scheme[MM_MAX_TOKEN_LENGTH]; - char *p; - - mm_clear_typecode(matcode); - - if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) - return MM_PREMATURE_EOF; - - if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type, - storage_scheme) != 5) - return MM_PREMATURE_EOF; - - for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */ - for (p=crd; *p!='\0'; *p=tolower(*p),p++); - for (p=data_type; *p!='\0'; *p=tolower(*p),p++); - for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++); - - /* check for banner */ - if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) - return MM_NO_HEADER; - - /* first field should be "mtx" */ - if (strcmp(mtx, MM_MTX_STR) != 0) - return MM_UNSUPPORTED_TYPE; - mm_set_matrix(matcode); - - - /* second field describes whether this is a sparse matrix (in coordinate - storgae) or a dense array */ - - - if (strcmp(crd, MM_SPARSE_STR) == 0) - mm_set_sparse(matcode); - else - if (strcmp(crd, MM_DENSE_STR) == 0) - mm_set_dense(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - /* third field */ - - if (strcmp(data_type, MM_REAL_STR) == 0) - mm_set_real(matcode); - else - if (strcmp(data_type, MM_COMPLEX_STR) == 0) - mm_set_complex(matcode); - else - if (strcmp(data_type, MM_PATTERN_STR) == 0) - mm_set_pattern(matcode); - else - if (strcmp(data_type, MM_INT_STR) == 0) - mm_set_integer(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - /* fourth field */ - - if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) - mm_set_general(matcode); - else - if (strcmp(storage_scheme, MM_SYMM_STR) == 0) - mm_set_symmetric(matcode); - else - if (strcmp(storage_scheme, MM_HERM_STR) == 0) - mm_set_hermitian(matcode); - else - if (strcmp(storage_scheme, MM_SKEW_STR) == 0) - mm_set_skew(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - return 0; -} - -int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz) -{ - if (fprintf(f, "%d %d %d\n", M, N, nz) != 3) - return MM_COULD_NOT_WRITE_FILE; - else - return 0; -} - -int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz ) -{ - char line[MM_MAX_LINE_LENGTH]; - int num_items_read; - - /* set return null parameter values, in case we exit with errors */ - *M = *N = *nz = 0; - - /* now continue scanning until you reach the end-of-comments */ - do - { - if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) - return MM_PREMATURE_EOF; - }while (line[0] == '%'); - - /* line[] is either blank or has M,N, nz */ - if (sscanf(line, "%d %d %d", M, N, nz) == 3) - return 0; - - else - do - { - num_items_read = fscanf(f, "%d %d %d", M, N, nz); - if (num_items_read == EOF) return MM_PREMATURE_EOF; - } - while (num_items_read != 3); - - return 0; -} - - -int mm_read_mtx_array_size(FILE *f, int *M, int *N) -{ - char line[MM_MAX_LINE_LENGTH]; - int num_items_read; - /* set return null parameter values, in case we exit with errors */ - *M = *N = 0; - - /* now continue scanning until you reach the end-of-comments */ - do - { - if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) - return MM_PREMATURE_EOF; - }while (line[0] == '%'); - - /* line[] is either blank or has M,N, nz */ - if (sscanf(line, "%d %d", M, N) == 2) - return 0; - - else /* we have a blank line */ - do - { - num_items_read = fscanf(f, "%d %d", M, N); - if (num_items_read == EOF) return MM_PREMATURE_EOF; - } - while (num_items_read != 2); - - return 0; -} - -int mm_write_mtx_array_size(FILE *f, int M, int N) -{ - if (fprintf(f, "%d %d\n", M, N) != 2) - return MM_COULD_NOT_WRITE_FILE; - else - return 0; -} - - - -/*-------------------------------------------------------------------------*/ - -/******************************************************************/ -/* use when I[], J[], and val[]J, and val[] are already allocated */ -/******************************************************************/ - -int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[], - double val[], MM_typecode matcode) -{ - int i; - if (mm_is_complex(matcode)) - { - for (i=0; i -#include "limits.h" - -namespace { -bool little_endian() -{ - int num = 1; - return (*(char *)&num == 1); -} - -template -void endian_swap(T *objp) -{ - unsigned char *memp = reinterpret_cast(objp); - std::reverse(memp, memp + sizeof(T)); -} -} - -namespace raptor { -ParCSRMatrix* readParMatrix(const char* filename, - int local_num_rows, int local_num_cols, - int first_local_row, int first_local_col, - RAPtor_MPI_Comm comm) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(comm, &rank); - RAPtor_MPI_Comm_size(comm, &num_procs); - - ParCSRMatrix* A = NULL; - - int64_t pos; - int32_t code; - int32_t global_num_rows; - int32_t global_num_cols; - int32_t global_nnz; - int32_t idx; - int n_items_read; - double val; - - bool is_little_endian = false; - - int ctr, size; - - int sizeof_dbl = sizeof(val); - int sizeof_int32 = sizeof(code); - - FILE* ifile = fopen(filename, "rb"); - if (fseek(ifile, 0, SEEK_SET)) printf("Error seeking beginning of file\n"); - - // Read code, and determine if little endian, or if long int - int32_t header[4]; - n_items_read = fread(header, sizeof_int32, 4, ifile); - code = header[0]; - global_num_rows = header[1]; - global_num_cols = header[2]; - global_nnz = header[3]; - if (code != PETSC_MAT_CODE) - { - endian_swap(&code); - endian_swap(&global_num_rows); - endian_swap(&global_num_cols); - endian_swap(&global_nnz); - is_little_endian = true; - } - - if (first_local_col >= 0) - { - A = new ParCSRMatrix(global_num_rows, global_num_cols, - local_num_rows, local_num_cols, - first_local_row, first_local_col); - } - else - { - A = new ParCSRMatrix(global_num_rows, global_num_cols); - } - - std::vector row_sizes; - std::vector col_indices; - std::vector vals; - std::vector proc_nnz(num_procs); - if (A->local_num_rows) - row_sizes.resize(A->local_num_rows); - int nnz = 0; - - // Find row sizes - pos = (4 + A->partition->first_local_row) * sizeof_int32; - if (fseek(ifile, pos, SEEK_SET)) printf("Error seeking pos\n"); - if (A->local_num_rows) - { - n_items_read = fread(row_sizes.data(), sizeof_int32, A->local_num_rows, ifile); - if (n_items_read == EOF) printf("EOF reading code\n"); - if (ferror(ifile)) printf("Error reading row_size\n"); - if (is_little_endian) - { - for (int i = 0; i < A->local_num_rows; i++) - { - endian_swap(&(row_sizes[i])); - nnz += row_sizes[i]; - } - } - else - { - for (int i = 0; i < A->local_num_rows; i++) - { - nnz += row_sizes[i]; - } - } - } - - // Find nnz per proc (to find first_nnz) - RAPtor_MPI_Allgather(&nnz, 1, RAPtor_MPI_INT, proc_nnz.data(), 1, RAPtor_MPI_INT, comm); - long first_nnz = 0; - for (int i = 0; i < rank; i++) - first_nnz += proc_nnz[i]; - long total_nnz = first_nnz; - for (int i = rank; i < num_procs; i++) - total_nnz += proc_nnz[i]; - - // Resize variables - if (nnz) - { - col_indices.resize(nnz); - vals.resize(nnz); - } - - // Read in col_indices - pos = (4 + A->global_num_rows + first_nnz) * sizeof_int32; - if (fseek(ifile, pos, SEEK_SET)) printf("Error seeking pos\n"); - n_items_read = fread(col_indices.data(), sizeof_int32, nnz, ifile); - if (n_items_read == EOF) printf("EOF reading code\n"); - if (ferror(ifile)) printf("Error reading col idx\n"); - - pos = (4 + A->global_num_rows + total_nnz) * sizeof_int32 + (first_nnz * sizeof_dbl); - if (fseek(ifile, pos, SEEK_SET)) printf("Error seeking pos\n"); - n_items_read = fread(vals.data(), sizeof_dbl, nnz, ifile); - if (n_items_read == EOF) printf("EOF reading code\n"); - if (ferror(ifile)) printf("Error reading value\n"); - - if (is_little_endian) - { - for (int i = 0; i < nnz; i++) - { - endian_swap(&(col_indices[i])); - endian_swap(&(vals[i])); - } - } - - fclose(ifile); - - A->on_proc->idx1[0] = 0; - A->off_proc->idx1[0] = 0; - ctr = 0; - for (int i = 0; i < A->local_num_rows; i++) - { - size = row_sizes[i]; - for (int j = 0; j < size; j++) - { - idx = col_indices[ctr]; - val = vals[ctr++]; - if ((int) idx >= A->partition->first_local_col && - (int) idx <= A->partition->last_local_col) - { - A->on_proc->idx2.emplace_back(idx - A->partition->first_local_col); - A->on_proc->vals.emplace_back(val); - } - else - { - A->off_proc->idx2.emplace_back(idx); - A->off_proc->vals.emplace_back(val); - } - } - A->on_proc->idx1[i+1] = A->on_proc->idx2.size(); - A->off_proc->idx1[i+1] = A->off_proc->idx2.size(); - } - A->on_proc->nnz = A->on_proc->idx2.size(); - A->off_proc->nnz = A->off_proc->idx2.size(); - - A->finalize(); - - return A; -} -} diff --git a/raptor/gallery/par_matrix_IO.hpp b/raptor/gallery/par_matrix_IO.hpp deleted file mode 100644 index a5ea6ef2..00000000 --- a/raptor/gallery/par_matrix_IO.hpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef PAR_MATRIX_IO_H -#define PAR_MATRIX_IO_H - -#include -#include -#include -#include -#include -#include // std::cout -#include // std::ifstream - -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/types.hpp" - -namespace raptor { - -ParCSRMatrix* readParMatrix(const char* filename, - int local_num_rows = -1, int local_num_cols = -1, - int first_local_row = -1, int first_local_col = -1, - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD); - -} -#endif diff --git a/raptor/gallery/par_matrix_market.cpp b/raptor/gallery/par_matrix_market.cpp deleted file mode 100644 index eabcd962..00000000 --- a/raptor/gallery/par_matrix_market.cpp +++ /dev/null @@ -1,309 +0,0 @@ -/* -* Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -#include -#include -#include -#include - -#include "par_matrix_market.hpp" - -namespace raptor { - -// Declare Private Methods -void write_par_data(FILE* f, int n, int* rowptr, int* col_idx, - double* vals, int first_row, int* col_map); - -ParCSRMatrix* read_par_mm(const char *fname) -{ - FILE *f; - MM_typecode matcode; - int M, N, nz; - int i; - int row, col; - int n_items_read; - double val; - - if ((f = fopen(fname, "r")) == NULL) - return NULL; - - - if (mm_read_banner(f, &matcode) != 0) - { - printf("mm_read_unsymetric: Could not process Matrix Market banner "); - printf(" in file [%s]\n", fname); - return NULL; - } - - - if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) && - mm_is_sparse(matcode))) - { - fprintf(stderr, "Sorry, this application does not support "); - fprintf(stderr, "Market Market type: [%s]\n", - mm_typecode_to_str(matcode)); - return NULL; - } - - /* find out size of sparse matrix: M, N, nz .... */ - - if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0) - { - fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n"); - return NULL; - } - - int row_nnz = nz / M; - ParCOOMatrix* A = new ParCOOMatrix(M, N); - A->on_proc->vals.reserve(row_nnz); - A->off_proc->vals.reserve(row_nnz); - - /* NOTE: when reading in doubles, ANSI C requires the use of the "l" */ - /* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */ - /* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */ - - bool symmetric = mm_is_symmetric(matcode); - bool row_local; - bool col_local; - for (i=0; i= A->partition->first_local_row && row <= A->partition->last_local_row) - { - row_local = true; - row -= A->partition->first_local_row; - } - else - { - row_local = false; - if (!symmetric) - continue; - } - if (col >= A->partition->first_local_col && col <= A->partition->last_local_col) - { - col_local = true; - col -= A->partition->first_local_col; - } - else - { - col_local = false; - if (!row_local) - continue; - } - - if (row_local) - { - if (col_local) - { - A->on_proc->add_value(row, col, val); - } - else - { - A->off_proc->add_value(row, col, val); - } - } - - if (symmetric) - { - if (col_local) - { - if (row_local) - { - A->on_proc->add_value(col, row, val); - } - else - { - A->off_proc->add_value(col, row, val); - } - } - } - } - - A->finalize(); - ParCSRMatrix* A_csr = A->to_ParCSR(); - delete A; - - fclose(f); - - return A_csr; -} - -void write_par_data(FILE* f, int n, int* rowptr, int* col_idx, - double* vals, int first_row, int* col_map) -{ - int start, end, global_row; - - for (int i = 0; i < n; i++) - { - global_row = first_row + i; - start = rowptr[i]; - end = rowptr[i+1]; - for (int j = start; j < end; j++) - { - fprintf(f, "%d %d %2.15e\n", global_row + 1, - col_map[col_idx[j]] + 1, vals[j]); - } - } -} - - -void write_par_mm(ParCSRMatrix* A, const char *fname) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - FILE *f; - MM_typecode matcode; - int pos; - int int_bytes, double_bytes; - int num_ints, num_doubles; - int comm_size; - - std::vector buffer; - - int nnz = A->local_nnz; - int global_nnz; - RAPtor_MPI_Reduce(&nnz, &global_nnz, 1, RAPtor_MPI_INT, RAPtor_MPI_SUM, 0, RAPtor_MPI_COMM_WORLD); - - std::vector proc_dims(5*num_procs); - int dims[5]; - dims[0] = A->local_num_rows + 1; - dims[1] = A->on_proc_num_cols; - dims[2] = A->off_proc_num_cols; - dims[3] = A->on_proc->nnz; - dims[4] = A->off_proc->nnz; - RAPtor_MPI_Gather(dims, 5, RAPtor_MPI_INT, proc_dims.data(), 5, RAPtor_MPI_INT, 0, RAPtor_MPI_COMM_WORLD); - - if (rank == 0) // RANK 0 IS ONLY ONE WRITING TO FILE - { - f = fopen(fname, "w"); - - mm_initialize_typecode(&matcode); - mm_set_matrix(&matcode); - mm_set_coordinate(&matcode); - mm_set_real(&matcode); - - mm_write_banner(f, matcode); - fprintf(f, "%%\n"); - mm_write_mtx_crd_size(f, A->global_num_rows, A->global_num_cols, - global_nnz); - - // Write local data - int first_row = 0; - write_par_data(f, A->local_num_rows, A->on_proc->idx1.data(), - A->on_proc->idx2.data(), A->on_proc->vals.data(), - first_row, A->on_proc_column_map.data()); - write_par_data(f, A->local_num_rows, A->off_proc->idx1.data(), - A->off_proc->idx2.data(), A->off_proc->vals.data(), - first_row, A->off_proc_column_map.data()); - first_row += A->local_num_rows; - - // Write data from other processes - std::vector idx1; - std::vector idx2; - std::vector vals; - std::vector row_map; - std::vector col_map; - for (int i = 1; i < num_procs; i++) - { - // Calculate comm_size and allocate recv_buf - int* i_dims = &proc_dims[i*5]; - num_ints = i_dims[0] * 2 + i_dims[1] + i_dims[3] + i_dims[3] + i_dims[4]; - num_doubles = i_dims[3] + i_dims[4]; - RAPtor_MPI_Pack_size(num_ints, RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD, &int_bytes); - RAPtor_MPI_Pack_size(num_doubles, RAPtor_MPI_DOUBLE, RAPtor_MPI_COMM_WORLD, &double_bytes); - comm_size = int_bytes + double_bytes; - if ((int)buffer.size() < comm_size) buffer.resize(comm_size); - - // Resize Matrix Arrays - int row_max = i_dims[0]; - int col_max = i_dims[1]; - int nnz_max = i_dims[3]; - if (i_dims[2] > i_dims[1]) col_max = i_dims[2]; - if (i_dims[4] > i_dims[3]) nnz_max = i_dims[4]; - if ((int)col_map.size() < col_max) col_map.resize(col_max); - if ((int)idx1.size() < row_max) idx1.resize(row_max); - if ((int)idx2.size() < nnz_max) - { - idx2.resize(nnz_max); - vals.resize(nnz_max); - } - - // Recv Packed Buffer - RAPtor_MPI_Recv(buffer.data(), comm_size, RAPtor_MPI_PACKED, i, 1234, RAPtor_MPI_COMM_WORLD, - RAPtor_MPI_STATUS_IGNORE); - - // Unpack On Proc Data - pos = 0; - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, col_map.data(), i_dims[1], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, idx1.data(), i_dims[0], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, idx2.data(), i_dims[3], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, vals.data(), i_dims[3], - RAPtor_MPI_DOUBLE, RAPtor_MPI_COMM_WORLD); - write_par_data(f, i_dims[0] - 1, idx1.data(), idx2.data(), - vals.data(), first_row, col_map.data()); - - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, col_map.data(), i_dims[2], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, idx1.data(), i_dims[0], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, idx2.data(), i_dims[4], - RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Unpack(buffer.data(), comm_size, &pos, vals.data(), i_dims[4], - RAPtor_MPI_DOUBLE, RAPtor_MPI_COMM_WORLD); - write_par_data(f, i_dims[0] - 1, idx1.data(), idx2.data(), - vals.data(), first_row, col_map.data()); - - first_row += i_dims[0] - 1; - } - - fclose(f); - } - else // All processes that are not 0, send to 0 - { - // Determine send size (in bytes) - num_ints = dims[0] * 2 + dims[1] + dims[3] + dims[3] + dims[4]; - num_doubles = dims[3] + dims[4]; - RAPtor_MPI_Pack_size(num_ints, RAPtor_MPI_INT, RAPtor_MPI_COMM_WORLD, &int_bytes); - RAPtor_MPI_Pack_size(num_doubles, RAPtor_MPI_DOUBLE, RAPtor_MPI_COMM_WORLD, &double_bytes); - comm_size = int_bytes + double_bytes; - buffer.resize(comm_size); - - // Pack Data - pos = 0; - RAPtor_MPI_Pack(A->on_proc_column_map.data(), dims[1], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->on_proc->idx1.data(), dims[0], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->on_proc->idx2.data(), dims[3], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->on_proc->vals.data(), dims[3], RAPtor_MPI_DOUBLE, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - - RAPtor_MPI_Pack(A->off_proc_column_map.data(), dims[2], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->off_proc->idx1.data(), dims[0], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->off_proc->idx2.data(), dims[4], RAPtor_MPI_INT, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - RAPtor_MPI_Pack(A->off_proc->vals.data(), dims[4], RAPtor_MPI_DOUBLE, buffer.data(), comm_size, - &pos, RAPtor_MPI_COMM_WORLD); - - // Send Packed Data - RAPtor_MPI_Send(buffer.data(), comm_size, RAPtor_MPI_PACKED, 0, 1234, RAPtor_MPI_COMM_WORLD); - } -} - -} diff --git a/raptor/gallery/par_matrix_market.hpp b/raptor/gallery/par_matrix_market.hpp deleted file mode 100644 index 5a7af8d5..00000000 --- a/raptor/gallery/par_matrix_market.hpp +++ /dev/null @@ -1,23 +0,0 @@ -/* -* Matrix Market I/O library for ANSI C -* -* See http://math.nist.gov/MatrixMarket for details. -* -* -*/ - -#ifndef PAR_MM_IO_H -#define PAR_MM_IO_H - -#include "matrix_market.hpp" -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" - -namespace raptor { - -/* high level routines */ -ParCSRMatrix* read_par_mm(const char *fname); -void write_par_mm(ParCSRMatrix* A, const char *fname); -} - -#endif diff --git a/raptor/gallery/par_random.cpp b/raptor/gallery/par_random.cpp deleted file mode 100644 index bb49d5ce..00000000 --- a/raptor/gallery/par_random.cpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "par_random.hpp" - -namespace raptor { -ParCSRMatrix* par_random(int global_rows, int global_cols, int nnz_per_row) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - ParCOOMatrix* A_coo; - double val = 1.0; - - A_coo = new ParCOOMatrix(global_rows, global_cols); - int local_nnz = nnz_per_row * A_coo->local_num_rows; - for (int i = 0; i < local_nnz; i++) - { - A_coo->add_value(rand() % A_coo->local_num_rows, rand() % global_cols, val); - } - A_coo->finalize(); - - ParCSRMatrix* A = A_coo->to_ParCSR(); - delete A_coo; - - return A; - -} - -} diff --git a/raptor/gallery/par_random.hpp b/raptor/gallery/par_random.hpp deleted file mode 100644 index 1a93a128..00000000 --- a/raptor/gallery/par_random.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef RAPTOR_GALLERY_PARRANDOM_HPP -#define RAPTOR_GALLERY_PARRANDOM_HPP - -#include -#include -#include -#include - -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/types.hpp" - -namespace raptor { - -ParCSRMatrix* par_random(int global_rows, int global_cols, int nnz_per_row); -} -#endif diff --git a/raptor/gallery/par_stencil.cpp b/raptor/gallery/par_stencil.cpp deleted file mode 100644 index ab18f3a3..00000000 --- a/raptor/gallery/par_stencil.cpp +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "par_stencil.hpp" - -namespace raptor { -ParCSRMatrix* par_stencil_grid(data_t* stencil, int* grid, int dim) -{ - // Get MPI Information - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - std::vector diags; - std::vector nonzero_stencil; - std::vector strides(dim); - std::vector data; - std::vector stack_indices; - - int stencil_len, ctr; - int N_v; // Number of rows (and cols) in matrix - int N_s; // Number of nonzero stencil entries - int n_v; // Local number of rows (and cols) - - int init_step, idx; - int len, step, current_step; - int col; - double value; - - // Initialize variables - stencil_len = (index_t)pow(3, dim); // stencil - 3 ^ dim - - //N_v is global number of rows - N_v = 1; - for (index_t i = 0; i < dim; i++) - { - N_v *= grid[i]; - } - - //N_s is number of nonzero stencil entries - N_s = 0; - for (index_t i = 0; i < stencil_len; i++) - { - if (fabs(stencil[i]) > zero_tol) - { - N_s++; - } - } - - ParCSRMatrix* A = new ParCSRMatrix(N_v, N_v); - - n_v = A->partition->local_num_rows; - int first_local_row = A->partition->first_local_row; - int last_local_row = first_local_row + n_v - 1; - - A->on_proc->n_rows = n_v; - A->on_proc->n_cols = n_v; - A->on_proc->nnz = 0; - A->on_proc->idx1.resize(n_v+1); - A->on_proc->idx2.reserve(n_v*stencil_len); - A->on_proc->vals.reserve(n_v*stencil_len); - - A->off_proc->n_rows = n_v; - A->off_proc->n_cols = N_v; - A->off_proc->nnz = 0; - A->off_proc->idx1.resize(n_v+1); - A->off_proc->idx2.reserve(0.3*n_v*stencil_len); - A->off_proc->vals.reserve(0.3*n_v*stencil_len); - - - diags.resize(N_s, 0); - nonzero_stencil.resize(N_s); - strides.resize(dim); - //Calculate strides for index offset for each dof in stencil - strides[0] = 1; - for (index_t i = 0; i < dim-1; i++) - { - strides[i+1] = grid[dim-i-1] * strides[i]; - } - - //Calculate indices of nonzeros in stencil - index_t indices[N_s][dim]; - ctr = 0; - for (index_t i = 0; i < stencil_len; i++) - { - if (fabs(stencil[i]) > zero_tol) - { - for (index_t j = 0; j < dim; j++) - { - //index_t power = pow(3, j); - index_t idiv = i / pow(3, j); - indices[ctr][dim-j-1] = (idiv % 3) - (3 / 2); - } - nonzero_stencil[ctr] = stencil[i]; - ctr++; - } - } - - //Add strides to diags - for (index_t i = 0; i < dim; i++) - { - for (index_t j = 0; j < N_s; j++) - { - diags[j] += strides[i] * indices[j][dim-i-1]; - } - } - - //Initial data array - data.resize(N_s*n_v); - for (index_t i = 0; i < N_s; i++) - { - for (index_t j = 0; j < n_v; j++) - { - data[i*n_v + j] = nonzero_stencil[i]; - } - } - - //Vertically stack indices (reorder) - stack_indices.resize(N_s*dim); - for (index_t i = 0; i < N_s; i++) - { - for (index_t j = 0; j < dim; j++) - { - stack_indices[i*dim+j] = indices[i][j]; - } - } - - //Zero boundary conditions - for (index_t i = 0; i < N_s; i++) - { - //get correct chunk of data - //(corresponding to single stencil entry) - init_step = i*n_v; - for (index_t j = 0; j < dim; j++) - { - //If main diagonal, no boundary conditions - idx = stack_indices[i*dim + j]; - if (idx == 0) - { - continue; - } - - //Calculate length of chunks that are to - // be set to zero, and step size between - // these blocks of data - len = 1; - step = 1; - for (index_t k = 0; k < (dim-j-1); k++) - { - len *= grid[k]; - } - step = len * grid[0]; - - //zeros at beginning - if (idx > 0) - { - current_step = step * (first_local_row / step); - - //If previous boundary lies on processor - for (index_t k = current_step; k < last_local_row+1; k+=step) - { - for (index_t l = 0; l < len; l++) - { - if (k+l > last_local_row) - { - break; - } - if (k+l < first_local_row) - { - continue; - } - data[init_step + (k-first_local_row) + l] = 0; - } - } - } - - //zeros at end - else if (idx < 0) - { - current_step = step*(((last_local_row-1)/step)+1); - - //If previous boundary lies on processor - for (index_t k = current_step; k > first_local_row; k-=step) - { - for (index_t l = 0; l < len; l++) - { - if (k - l - 1 < first_local_row) - { - break; - } - else if (k - l - 1 > last_local_row) - { - continue; - } - data[init_step + (k-l-first_local_row) -1] = 0; - } - } - } - } - } - - //Add diagonals to ParMatrix A - A->on_proc->idx1[0] = 0; - A->off_proc->idx1[0] = 0; - for (index_t i = 0; i < n_v; i++) - { - for (index_t d = 0; d < N_s; d++) - { - //add data[i] if nonzero - col = diags[d] + i + first_local_row; - value = data[(N_s-d-1)*n_v+i]; - if (col >= 0 && col < N_v && fabs(value) > zero_tol) - { - A->add_value(i, col, value); - } - } - A->on_proc->idx1[i+1] = A->on_proc->idx2.size(); - A->off_proc->idx1[i+1] = A->off_proc->idx2.size(); - } - - A->on_proc->nnz = A->on_proc->idx2.size(); - A->off_proc->nnz = A->off_proc->idx2.size(); - - A->finalize(); - - return A; -} - -} diff --git a/raptor/gallery/par_stencil.hpp b/raptor/gallery/par_stencil.hpp deleted file mode 100644 index 407734b4..00000000 --- a/raptor/gallery/par_stencil.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef PARSTENCIL_HPP -#define PARSTENCIL_HPP - -#include -#include -#include - -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" - -namespace raptor { - -ParCSRMatrix* par_stencil_grid(data_t* stencil, int* grid, int dim); - -} -#endif diff --git a/raptor/gallery/random.cpp b/raptor/gallery/random.cpp deleted file mode 100644 index 878ce332..00000000 --- a/raptor/gallery/random.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "random.hpp" - -namespace raptor { -CSRMatrix* random(int rows, int cols, int nnz_per_row) -{ - CSRMatrix* A; - COOMatrix* Atmp = new COOMatrix(rows, cols, nnz_per_row); - - int nnz = nnz_per_row * rows; - for (int i = 0; i < nnz; i++) - { - Atmp->idx1.emplace_back(rand() % rows); - Atmp->idx2.emplace_back(rand() % cols); - Atmp->vals.emplace_back(1.0); - } - Atmp->nnz = nnz; - - A = Atmp->to_CSR(); - delete Atmp; - - return A; - -} - -} diff --git a/raptor/gallery/random.hpp b/raptor/gallery/random.hpp deleted file mode 100644 index 166a150f..00000000 --- a/raptor/gallery/random.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef RAPTOR_GALLERY_RANDOM_HPP -#define RAPTOR_GALLERY_RANDOM_HPP - -#include -#include -#include -#include - -#include "raptor/core/matrix.hpp" -#include "raptor/core/types.hpp" - -namespace raptor { - -CSRMatrix* random(int rows, int cols, int nnz_per_row); -} -#endif diff --git a/raptor/gallery/stencil.cpp b/raptor/gallery/stencil.cpp deleted file mode 100644 index 863ec797..00000000 --- a/raptor/gallery/stencil.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "stencil.hpp" - -namespace raptor { -// Stencils are symmetric, so A could be CSR or CSC -CSRMatrix* stencil_grid(data_t* stencil, int* grid, int dim) -{ - std::vector diags; - std::vector nonzero_stencil; - std::vector strides(dim); - std::vector data; - std::vector stack_indices; - - int stencil_len, ctr; - int N_v; // Number of rows (and cols) in matrix - int N_s; // Number of nonzero stencil entries - int init_step, idx; - int len, step; - int col; - double value; - - stencil_len = (int)pow(3, dim); - - N_v = 1; - for (int i = 0; i < dim; i++) - { - N_v *= grid[i]; - } - - N_s = 0; - for (int i = 0; i < stencil_len; i++) - { - if (fabs(stencil[i]) > zero_tol) - { - N_s++; - } - } - - // Set dimensions of A - CSRMatrix* A = new CSRMatrix(N_v, N_v); - - diags.resize(N_s, 0); - nonzero_stencil.resize(N_s); - strides[0] = 1; - for (int i = 0; i < dim - 1; i++) - { - strides[i+1] = grid[dim-i-1] * strides[i]; - } - - // Calculate indices of nonzeros in stencil - int indices[N_s][dim]; - ctr = 0; - for (int i = 0; i < stencil_len; i++) - { - if (fabs(stencil[i]) > zero_tol) - { - for (int j = 0; j < dim; j++) - { - //int power = pow(3, j); - int idiv = i / pow(3, j); - indices[ctr][dim-j-1] = (idiv % 3) - (3 / 2); - } - nonzero_stencil[ctr] = stencil[i]; - ctr++; - } - } - - // Add strides to diags - for (int i = 0; i < dim; i++) - { - for (int j = 0; j < N_s; j++) - { - diags[j] += strides[i] * indices[j][dim-i-1]; - } - } - - // Initial data array - data.resize(N_s*N_v); - for (int i = 0; i < N_s; i++) - { - for (int j = 0; j < N_v; j++) - { - data[i*N_v + j] = nonzero_stencil[i]; - } - } - - // Vertically stack indices (reorder) - stack_indices.resize(N_s*dim); - for (int i = 0; i < N_s; i++) - { - for (int j = 0; j < dim; j++) - { - stack_indices[i*dim+j] = indices[i][j]; - } - } - - - //Zero boundary conditions - for (int i = 0; i < N_s; i++) - { - //get correct chunk of data - //(corresponding to single stencil entry) - init_step = i*N_v; - for (int j = 0; j < dim; j++) - { - //If main diagonal, no boundary conditions - idx = stack_indices[i*dim + j]; - if (idx == 0) - { - continue; - } - - //Calculate length of chunks that are to - // be set to zero, and step size between - // these blocks of data - len = 1; - step = 1; - for (int k = 0; k < (dim-j-1); k++) - { - len *= grid[k]; - } - step = len * grid[0]; - - //zeros at beginning - if (idx > 0) - { - //If previous boundary lies on processor - for (int k = 0; k < N_v; k+=step) - { - for (int l = 0; l < len; l++) - { - if (k+l > N_v) - { - break; - } - if (k+l < 0) - { - continue; - } - data[init_step + (k-0) + l] = 0; - } - } - } - - //zeros at end - else if (idx < 0) - { - //If previous boundary lies on processor - for (int k = N_v; k > 0; k-=step) - { - for (int l = 0; l < len; l++) - { - if (k - l - 1 < 0) - { - break; - } - else if (k - l - 1 > N_v) - { - continue; - } - data[init_step + (k-l-0) -1] = 0; - } - } - } - } - } - - //Add diagonals to ParMatrix A - A->idx2.reserve(N_s*N_v); - A->vals.reserve(N_s*N_v); - - A->idx1[0] = 0; - for (int i = 0; i < N_v; i++) - { - for (int d = 0; d < N_s; d++) - { - //add data[i] if nonzero - col = diags[d] + i; - value = data[(N_s-d-1)*N_v+i]; - if (col >= 0 && col < N_v && fabs(value) > zero_tol) - //if (fabs(value) > zero_tol) - { - A->idx2.emplace_back(col); - A->vals.emplace_back(value); - } - } - A->idx1[i+1] = A->idx2.size(); - } - A->nnz = A->idx2.size(); - - return A; -} - -} diff --git a/raptor/gallery/stencil.hpp b/raptor/gallery/stencil.hpp deleted file mode 100644 index 4f0c643b..00000000 --- a/raptor/gallery/stencil.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef STENCIL_HPP -#define STENCIL_HPP - -#include -#include -#include - -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" - -namespace raptor { - -// Stencils are symmetric, so A could be CSR or CSC -CSRMatrix* stencil_grid(data_t* stencil, int* grid, int dim); -} -#endif - diff --git a/raptor/gallery/tests/CMakeLists.txt b/raptor/gallery/tests/CMakeLists.txt deleted file mode 100644 index 57719cba..00000000 --- a/raptor/gallery/tests/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -add_executable(test_stencil test_stencil.cpp) -target_link_libraries(test_stencil raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(StencilTest ./test_stencil) - -add_executable(test_laplacian test_laplacian.cpp) -target_link_libraries(test_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(LaplacianTest ./test_laplacian) - -add_executable(test_aniso test_aniso.cpp) -target_link_libraries(test_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(AnisoTest ./test_aniso) - -add_executable(test_matrix_market test_matrix_market.cpp) -target_link_libraries(test_matrix_market raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(MatrixMarketTest ./test_matrix_market) - - -if (WITH_MPI) - add_executable(test_par_laplacian test_par_laplacian.cpp) - target_link_libraries(test_par_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParLaplacianTest ${MPIRUN} -n 1 ${HOST} ./test_par_laplacian) - add_test(ParLaplacianTest ${MPIRUN} -n 2 ${HOST} ./test_par_laplacian) - - add_executable(test_par_aniso test_par_aniso.cpp) - target_link_libraries(test_par_aniso raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParAnisoTest ${MPIRUN} -n 1 ${HOST} ./test_par_aniso) - add_test(ParAnisoTest ${MPIRUN} -n 2 ${HOST} ./test_par_aniso) - - add_executable(test_par_matrix_market test_par_matrix_market.cpp) - target_link_libraries(test_par_matrix_market raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParMatrixMarketTest ${MPIRUN} -n 1 ${HOST} ./test_par_matrix_market) - add_test(ParMatrixMarketTest ${MPIRUN} -n 2 ${HOST} ./test_par_matrix_market) -endif() - diff --git a/raptor/gallery/tests/test_aniso.cpp b/raptor/gallery/tests/test_aniso.cpp deleted file mode 100644 index 19223e0b..00000000 --- a/raptor/gallery/tests/test_aniso.cpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -using namespace raptor; - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} // end of main() // - -TEST(AnisoTest, TestsInGallery) -{ - - - int start, end; - - int grid[2] = {25, 25}; - double eps = 0.001; - double theta = M_PI/8.0; - double* stencil = diffusion_stencil_2d(eps, theta); - CSRMatrix* A_sten = stencil_grid(stencil, grid, 2); - CSRMatrix* A_io = readMatrix("../../../../test_data/aniso.pm"); - - // Compare shapes - ASSERT_EQ(A_io->n_rows, A_sten->n_rows); - ASSERT_EQ(A_io->n_cols, A_sten->n_cols); - - A_sten->sort(); - //A_sten->remove_duplicates(); - - A_io->sort(); - //A_io->remove_duplicates(); - - ASSERT_EQ(A_sten->idx1[0], A_io->idx1[0]); - for (int i = 0; i < A_io->n_rows; i++) - { - // Check correct row_ptrs - ASSERT_EQ(A_sten->idx1[i+1], A_io->idx1[i+1]); - start = A_sten->idx1[i]; - end = A_sten->idx1[i+1]; - - // Check correct col indices / values - - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->idx2[j], A_io->idx2[j]); - //ASSERT_NEAR(A_sten->vals[j], A_io->vals[j], 1e-12); - } - } - - delete A_io; - delete[] stencil; - delete A_sten; -} // end of TEST(AnisoTest, TestsInGallery) // - diff --git a/raptor/gallery/tests/test_laplacian.cpp b/raptor/gallery/tests/test_laplacian.cpp deleted file mode 100644 index adbe094f..00000000 --- a/raptor/gallery/tests/test_laplacian.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} // end of main() // - -TEST(LaplacianTest, TestsInGallery) -{ - int start, end; - - int grid[3] = {10, 10, 10}; - double* stencil = laplace_stencil_27pt(); - CSRMatrix* A_sten = stencil_grid(stencil, grid, 3); - - const char* mat_fn = "../../../../test_data/laplacian.pm"; - CSRMatrix* A_io = readMatrix(mat_fn); - - // Compare shapes - ASSERT_EQ(A_io->n_rows, A_sten->n_rows); - ASSERT_EQ(A_io->n_cols, A_sten->n_cols); - - A_sten->sort(); - A_io->sort(); - - ASSERT_EQ(A_sten->idx1[0], A_io->idx1[0]); - - for (int i = 0; i < A_io->n_rows; i++) - { - // Check correct row_ptrs - ASSERT_EQ(A_sten->idx1[i+1], A_io->idx1[i+1]); - start = A_sten->idx1[i]; - end = A_sten->idx1[i+1]; - - // Check correct col indices / values - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->idx2[j], A_io->idx2[j]); - ASSERT_NEAR(A_sten->vals[j], A_io->vals[j], zero_tol); - } - } - - delete[] stencil; - delete A_sten; - delete A_io; -} // end of TEST(LaplacianTest, TestsInGallery) // - diff --git a/raptor/gallery/tests/test_matrix_market.cpp b/raptor/gallery/tests/test_matrix_market.cpp deleted file mode 100644 index 2efa6bde..00000000 --- a/raptor/gallery/tests/test_matrix_market.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -#include "raptor/tests/compare.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} // end of main() // - -TEST(AnisoTest, TestsInGallery) -{ - const char* f_in = "../../../../test_data/sas_P0.mtx"; - const char* f_out = "../../../../test_data/sas_P0_out.mtx"; - CSRMatrix* Amm = read_mm(f_in); - write_mm(Amm, f_out); - CSRMatrix* Amm_out = read_mm(f_out); - compare(Amm, Amm_out); - - // Diff the two mtx files - std::string command = "diff "; - command += f_in; - command += " "; - command += f_out; - int err = system(command.c_str()); - ASSERT_EQ(err, 0); - - - remove(f_out); - - delete Amm; -} // end of TEST(AnisoTest, TestsInGallery) // - - diff --git a/raptor/gallery/tests/test_par_aniso.cpp b/raptor/gallery/tests/test_par_aniso.cpp deleted file mode 100644 index 7ef40dc6..00000000 --- a/raptor/gallery/tests/test_par_aniso.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp = RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; -} // end of main() // - -TEST(ParAnisoTest, TestsInGallery) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - const char* A0_fn = "../../../../test_data/aniso.pm"; - - - int start, end; - int grid[2] = {25, 25}; - double eps = 0.001; - double theta = M_PI/8.0; - double* stencil = diffusion_stencil_2d(eps, theta); - ParCSRMatrix* A_sten = par_stencil_grid(stencil, grid, 2); - ParCSRMatrix* A_io = readParMatrix(A0_fn); - - // Compare shapes - ASSERT_EQ(A_io->global_num_rows, A_sten->global_num_rows); - ASSERT_EQ(A_io->global_num_cols, A_sten->global_num_cols); - - ASSERT_EQ(A_sten->local_num_rows, A_io->local_num_rows); - ASSERT_EQ(A_sten->on_proc_num_cols, A_io->on_proc_num_cols); - ASSERT_EQ(A_sten->partition->first_local_row, A_io->partition->first_local_row); - ASSERT_EQ(A_sten->partition->last_local_row, A_io->partition->last_local_row); - ASSERT_EQ(A_sten->partition->first_local_col, A_io->partition->first_local_col); - ASSERT_EQ(A_sten->partition->last_local_col, A_io->partition->last_local_col); - - std::vector global_col_starts(num_procs+1); - std::vector global_row_starts(num_procs+1); - MPI_Allgather(&A_sten->partition->first_local_row, 1, MPI_INT, &global_row_starts[0], - 1, MPI_INT, MPI_COMM_WORLD); - MPI_Allgather(&A_sten->partition->first_local_col, 1, MPI_INT, &global_col_starts[0], - 1, MPI_INT, MPI_COMM_WORLD); - global_row_starts[num_procs] = A_sten->global_num_rows; - global_col_starts[num_procs] = A_sten->global_num_cols; - - ASSERT_EQ(A_sten->local_num_rows, (global_row_starts[rank+1] - global_row_starts[rank])); - ASSERT_EQ(A_sten->on_proc_num_cols, (global_col_starts[rank+1] - global_col_starts[rank])); - - if (A_sten->local_num_rows) - { - ASSERT_EQ(A_sten->partition->last_local_row, (global_col_starts[rank+1] - 1)); - } - if (A_sten->on_proc_num_cols) - { - ASSERT_EQ(A_sten->partition->last_local_col, (global_col_starts[rank+1] - 1)); - } - - A_sten->sort(); - A_io->sort(); - - ASSERT_EQ(A_sten->on_proc->idx1[0], A_io->on_proc->idx1[0]); - ASSERT_EQ(A_sten->off_proc->idx1[0], A_io->off_proc->idx1[0]); - - for (int i = 0; i < A_sten->local_num_rows; i++) - { - ASSERT_EQ(A_sten->on_proc->idx1[i+1], A_io->on_proc->idx1[i+1]); - start = A_sten->on_proc->idx1[i]; - end = A_sten->on_proc->idx1[i+1]; - - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->on_proc->idx2[j], A_io->on_proc->idx2[j]); - ASSERT_NEAR(A_sten->on_proc->vals[j], A_io->on_proc->vals[j], 1e-05); - } - - ASSERT_EQ(A_sten->off_proc->idx1[i+1], A_io->off_proc->idx1[i+1]); - start = A_sten->off_proc->idx1[i]; - end = A_sten->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->off_proc->idx2[j], A_io->off_proc->idx2[j]); - ASSERT_NEAR(A_sten->off_proc->vals[j], A_io->off_proc->vals[j], 1e-05); - } - } - - delete A_io; - delete A_sten; - delete[] stencil; -} // end of TEST(ParAnisoTest, TestsInGallery) // - diff --git a/raptor/gallery/tests/test_par_laplacian.cpp b/raptor/gallery/tests/test_par_laplacian.cpp deleted file mode 100644 index 398a103f..00000000 --- a/raptor/gallery/tests/test_par_laplacian.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp = RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; -} // end of main() // - -TEST(ParLaplacianTest, TestsInGallery) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - - - int start, end; - int grid[3] = {10, 10, 10}; - double* stencil = laplace_stencil_27pt(); - ParCSRMatrix* A_sten = par_stencil_grid(stencil, grid, 3); - - ParCSRMatrix* A_io = readParMatrix("../../../../test_data/laplacian27.pm"); - - // Compare shapes - ASSERT_EQ(A_io->global_num_rows, A_sten->global_num_rows); - ASSERT_EQ(A_io->global_num_rows, A_sten->global_num_cols); - - ASSERT_EQ(A_sten->local_num_rows, A_io->local_num_rows); - ASSERT_EQ(A_sten->on_proc_num_cols, A_io->on_proc_num_cols); - ASSERT_EQ(A_sten->partition->first_local_row, A_io->partition->first_local_row); - ASSERT_EQ(A_sten->partition->last_local_row, A_io->partition->last_local_row); - ASSERT_EQ(A_sten->partition->first_local_col, A_io->partition->first_local_col); - ASSERT_EQ(A_sten->partition->last_local_col, A_io->partition->last_local_col); - - std::vector global_col_starts(num_procs+1); - std::vector global_row_starts(num_procs+1); - MPI_Allgather(&A_sten->partition->first_local_row, 1, MPI_INT, &global_row_starts[0], - 1, MPI_INT, MPI_COMM_WORLD); - MPI_Allgather(&A_sten->partition->first_local_col, 1, MPI_INT, &global_col_starts[0], - 1, MPI_INT, MPI_COMM_WORLD); - global_row_starts[num_procs] = A_sten->global_num_rows; - global_col_starts[num_procs] = A_sten->global_num_cols; - - ASSERT_EQ( A_sten->local_num_rows, (global_row_starts[rank+1] - global_row_starts[rank])); - ASSERT_EQ( A_sten->on_proc_num_cols, (global_col_starts[rank+1] - global_col_starts[rank])); - - if (A_sten->local_num_rows) - { - ASSERT_EQ(A_sten->partition->last_local_row, (global_col_starts[rank+1] - 1)); - } - if (A_sten->on_proc_num_cols) - { - ASSERT_EQ(A_sten->partition->last_local_col, (global_col_starts[rank+1] - 1)); - } - - A_sten->sort(); - A_io->sort(); - - ASSERT_EQ(A_sten->on_proc->idx1[0], A_io->on_proc->idx1[0]); - ASSERT_EQ(A_sten->off_proc->idx1[0],A_io->off_proc->idx1[0]); - - for (int i = 0; i < A_sten->local_num_rows; i++) - { - ASSERT_EQ(A_sten->on_proc->idx1[i+1], A_io->on_proc->idx1[i+1]); - start = A_sten->on_proc->idx1[i]; - end = A_sten->on_proc->idx1[i+1]; - - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->on_proc->idx2[j], A_io->on_proc->idx2[j]); - ASSERT_NEAR(A_sten->on_proc->vals[j], A_io->on_proc->vals[j], 1e-05); - } - - ASSERT_EQ(A_sten->off_proc->idx1[i+1], A_io->off_proc->idx1[i+1]); - start = A_sten->off_proc->idx1[i]; - end = A_sten->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - ASSERT_EQ(A_sten->off_proc->idx2[j], A_io->off_proc->idx2[j]); - ASSERT_NEAR(A_sten->off_proc->vals[j], A_io->off_proc->vals[j], 1e-05); - } - } - - delete A_io; - delete A_sten; - delete[] stencil; - -} // end of TEST(ParLaplacianTest, TestsInGallery) // - diff --git a/raptor/gallery/tests/test_par_matrix_market.cpp b/raptor/gallery/tests/test_par_matrix_market.cpp deleted file mode 100644 index 000678c8..00000000 --- a/raptor/gallery/tests/test_par_matrix_market.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" -#include "raptor/tests/par_compare.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - MPI_Init(&argc, &argv); - ::testing::InitGoogleTest(&argc, argv); - int temp = RUN_ALL_TESTS(); - MPI_Finalize(); - return temp; -} // end of main() // - -TEST(ParAnisoTest, TestsInGallery) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - const char* f_in = "../../../../test_data/sas_P0.mtx"; - const char* f_out = "../../../../test_data/sas_P0_out.mtx"; - - ParCSRMatrix* Amm = read_par_mm(f_in); - - MPI_Barrier(MPI_COMM_WORLD); - write_par_mm(Amm, f_out); - - MPI_Barrier(MPI_COMM_WORLD); - ParCSRMatrix* Amm_out = read_par_mm(f_out); - - MPI_Barrier(MPI_COMM_WORLD); - compare(Amm, Amm_out); - - // Diff the two mtx files - if (rank == 0) - { - remove(f_out); - } - - delete Amm_out; - delete Amm; - - } // end of TEST(ParAnisoTest, TestsInGallery) // - - diff --git a/raptor/gallery/tests/test_stencil.cpp b/raptor/gallery/tests/test_stencil.cpp deleted file mode 100644 index ebe62ae1..00000000 --- a/raptor/gallery/tests/test_stencil.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "gtest/gtest.h" -#include "raptor/raptor.hpp" - -using namespace raptor; - -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} // end of main() // - -TEST(StencilTest, TestsInGallery) -{ - // Create A from diffusion stencil - int dim = 2; - std::vector grid(2, 4); - double eps = 0.001; - double theta = M_PI / 8.0; - double* stencil = diffusion_stencil_2d(eps, theta); - CSRMatrix* A = stencil_grid(stencil, grid.data(), dim); - delete[] stencil; - - std::vector A_python(16 * 16, 0); - std::vector A_dense(16 * 16, 0); - - // Add values of A_python - int rows[100] = {0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, - 9, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15}; - - int cols[100] = {0, 1, 4, 5, - 0, 1, 2, 4, 5, 6, - 1, 2, 3, 5, 6, 7, - 2, 3, 6, 7, - 0, 1, 4, 5, 8, 9, - 0, 1, 2, 4, 5, 6, 8, 9, 10, - 1, 2, 3, 5, 6, 7, 9, 10, 11, - 2, 3, 6, 7, 10, 11, - 4, 5, 8, 9, 12, 13, - 4, 5, 6, 8, 9, 10, 12, 13, 14, - 5, 6, 7, 9, 10, 11, 13, 14, 15, - 6, 7, 10, 11, 14, 15, - 8, 9, 12, 13, - 8, 9, 10, 12, 13, 14, - 9, 10, 11, 13, 14, 15, - 10, 11, 14, 15}; - - double data[100] = {1.33466666667, 0.186366503869, -0.520033170536, -0.343433251935, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, 0.186366503869, 1.33466666667, 0.00976658526801, -0.520033170536, -0.520033170536, 0.00976658526801, 1.33466666667, 0.186366503869, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.186366503869, 1.33466666667, 0.00976658526801, -0.520033170536, -0.520033170536, 0.00976658526801, 1.33466666667, 0.186366503869, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, 0.00976658526801, -0.520033170536, -0.343433251935, -0.343433251935, -0.520033170536, 0.186366503869, 1.33466666667, 0.00976658526801, -0.520033170536, -0.520033170536, 0.00976658526801, 1.33466666667, 0.186366503869, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, -0.343433251935, -0.520033170536, 0.00976658526801, 0.186366503869, 1.33466666667, 0.186366503869, -0.343433251935, -0.520033170536, 0.186366503869, 1.33466666667}; - - for (int i = 0; i < 100; i++) - { - int row = rows[i]; - int col = cols[i]; - A_python[row*16 + col] = data[i]; - } - - - for (int i = 0; i < A->n_rows; i++) - { - int row_start = A->idx1[i]; - int row_end = A->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - int col = A->idx2[j]; - A_dense[i*16 + col] = A->vals[j]; - } - } - - for (int i = 0; i < 16; i++) - { - for (int j = 0; j < 16; j++) - { - ASSERT_NEAR(A_python[i*16+j], A_dense[i*16+j], 1e-06); - } - } - -} // end of TEST(StencilTest, TestsInGallery)// - diff --git a/raptor/krylov/bicgstab.hpp b/raptor/krylov/bicgstab.hpp index a0487fea..473a851e 100644 --- a/raptor/krylov/bicgstab.hpp +++ b/raptor/krylov/bicgstab.hpp @@ -3,9 +3,7 @@ #include -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/krylov/cg.hpp b/raptor/krylov/cg.hpp index 737d64ac..73ae66fe 100644 --- a/raptor/krylov/cg.hpp +++ b/raptor/krylov/cg.hpp @@ -3,9 +3,7 @@ #include -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/krylov/par_bicgstab.hpp b/raptor/krylov/par_bicgstab.hpp index 64652cb6..af4f4120 100644 --- a/raptor/krylov/par_bicgstab.hpp +++ b/raptor/krylov/par_bicgstab.hpp @@ -3,9 +3,7 @@ #include -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" #include "raptor/multilevel/par_multilevel.hpp" #include "raptor/aggregation/par_smoothed_aggregation_solver.hpp" diff --git a/raptor/krylov/par_cg.hpp b/raptor/krylov/par_cg.hpp index 5a667671..e9e53381 100644 --- a/raptor/krylov/par_cg.hpp +++ b/raptor/krylov/par_cg.hpp @@ -3,9 +3,7 @@ #include -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" #include "raptor/multilevel/par_multilevel.hpp" namespace raptor { diff --git a/raptor/krylov/partial_inner.hpp b/raptor/krylov/partial_inner.hpp index 36f7ba02..98f543b8 100644 --- a/raptor/krylov/partial_inner.hpp +++ b/raptor/krylov/partial_inner.hpp @@ -3,8 +3,7 @@ #include -#include "raptor/core/types.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/multilevel/level.hpp b/raptor/multilevel/level.hpp index 174a9f95..757345eb 100644 --- a/raptor/multilevel/level.hpp +++ b/raptor/multilevel/level.hpp @@ -3,9 +3,7 @@ #ifndef RAPTOR_ML_LEVEL_H #define RAPTOR_ML_LEVEL_H -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" +#include "raptor-sparse.hpp" // Coarse Matrices (A) are CSC // Prolongation Matrices (P) are CSC diff --git a/raptor/multilevel/multilevel.hpp b/raptor/multilevel/multilevel.hpp index 97f578d3..c5bc7a8e 100644 --- a/raptor/multilevel/multilevel.hpp +++ b/raptor/multilevel/multilevel.hpp @@ -3,11 +3,9 @@ #ifndef RAPTOR_ML_MULTILEVEL_H #define RAPTOR_ML_MULTILEVEL_H -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" +#include "raptor-sparse.hpp" #include "level.hpp" -#include "raptor/util/linalg/relax.hpp" +#include "raptor/precondition/relax.hpp" // Coarse Matrices (A) are CSC // Prolongation Matrices (P) are CSC diff --git a/raptor/multilevel/par_level.hpp b/raptor/multilevel/par_level.hpp index a116bb38..53428e40 100644 --- a/raptor/multilevel/par_level.hpp +++ b/raptor/multilevel/par_level.hpp @@ -3,9 +3,7 @@ #ifndef RAPTOR_ML_PARLEVEL_H #define RAPTOR_ML_PARLEVEL_H -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" // Coarse Matrices (A) are CSR // Prolongation Matrices (P) are CSR diff --git a/raptor/multilevel/par_multilevel.hpp b/raptor/multilevel/par_multilevel.hpp index a55db77d..be4ffc5d 100644 --- a/raptor/multilevel/par_multilevel.hpp +++ b/raptor/multilevel/par_multilevel.hpp @@ -3,11 +3,9 @@ #ifndef RAPTOR_ML_PARMULTILEVEL_H #define RAPTOR_ML_PARMULTILEVEL_H -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" +#include "raptor-sparse.hpp" #include "raptor/multilevel/par_level.hpp" -#include "raptor/util/linalg/par_relax.hpp" +#include "raptor/precondition/par_relax.hpp" #include "raptor/ruge_stuben/par_interpolation.hpp" #include "raptor/ruge_stuben/par_cf_splitting.hpp" diff --git a/raptor/util/CMakeLists.txt b/raptor/precondition/CMakeLists 2.txt similarity index 100% rename from raptor/util/CMakeLists.txt rename to raptor/precondition/CMakeLists 2.txt diff --git a/raptor/precondition/CMakeLists.txt b/raptor/precondition/CMakeLists.txt new file mode 100644 index 00000000..f4c3f827 --- /dev/null +++ b/raptor/precondition/CMakeLists.txt @@ -0,0 +1,37 @@ +# Include the directory itself as a path to include directories +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +#Create a variable called linalg_SOURCES containing all .cpp files: + +if (WITH_MPI) + set(par_precond_HEADERS + precondition/par_relax.hpp + precondition/par_diag_scale.hpp + ) + set(par_precond_SOURCES + precondition/par_relax.cpp + precondition/par_diag_scale.cpp + ) +else () + set(par_precond_HEADERS + "" + ) + set (par_precond_SOURCES + "" + ) +endif() + +set(precond_HEADERS + precondition/relax.hpp + ${par_precond_HEADERS} + PARENT_SCOPE + ) +set(precond_SOURCES + precondition/relax.cpp + ${par_precond_SOURCES} + PARENT_SCOPE + ) + + + + diff --git a/raptor/util/linalg/par_diag_scale.cpp b/raptor/precondition/par_diag_scale.cpp similarity index 100% rename from raptor/util/linalg/par_diag_scale.cpp rename to raptor/precondition/par_diag_scale.cpp diff --git a/raptor/util/linalg/par_diag_scale.hpp b/raptor/precondition/par_diag_scale.hpp similarity index 86% rename from raptor/util/linalg/par_diag_scale.hpp rename to raptor/precondition/par_diag_scale.hpp index f734118d..ec896328 100644 --- a/raptor/util/linalg/par_diag_scale.hpp +++ b/raptor/precondition/par_diag_scale.hpp @@ -6,8 +6,7 @@ #include #include -#include "raptor/core/par_vector.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/util/linalg/par_relax.cpp b/raptor/precondition/par_relax.cpp similarity index 99% rename from raptor/util/linalg/par_relax.cpp rename to raptor/precondition/par_relax.cpp index 2cc253a8..f22b26b0 100644 --- a/raptor/util/linalg/par_relax.cpp +++ b/raptor/precondition/par_relax.cpp @@ -1,9 +1,7 @@ // Copyright (c) 2015-2017, RAPtor Developer Team // License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "raptor/core/types.hpp" #include "par_relax.hpp" -#include "raptor/core/par_matrix.hpp" namespace raptor { // Declare Private Methods diff --git a/raptor/util/linalg/par_relax.hpp b/raptor/precondition/par_relax.hpp similarity index 90% rename from raptor/util/linalg/par_relax.hpp rename to raptor/precondition/par_relax.hpp index 24cfc8f9..90430f68 100644 --- a/raptor/util/linalg/par_relax.hpp +++ b/raptor/precondition/par_relax.hpp @@ -6,8 +6,7 @@ #include #include -#include "raptor/core/par_vector.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" #include "raptor/multilevel/par_level.hpp" namespace raptor { diff --git a/raptor/util/linalg/relax.cpp b/raptor/precondition/relax.cpp similarity index 98% rename from raptor/util/linalg/relax.cpp rename to raptor/precondition/relax.cpp index b3942a37..58a31d2e 100644 --- a/raptor/util/linalg/relax.cpp +++ b/raptor/precondition/relax.cpp @@ -1,8 +1,6 @@ // Copyright (c) 2015-2017, RAPtor Developer Team // License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" -#include "raptor/core/vector.hpp" + #include "relax.hpp" extern "C" { diff --git a/raptor/util/linalg/relax.hpp b/raptor/precondition/relax.hpp similarity index 94% rename from raptor/util/linalg/relax.hpp rename to raptor/precondition/relax.hpp index 6f87fc37..50b04ff3 100644 --- a/raptor/util/linalg/relax.hpp +++ b/raptor/precondition/relax.hpp @@ -4,8 +4,7 @@ #define RAPTOR_UTILS_LINALG_SEQ_RELAX_H #include -#include "raptor/core/vector.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" #include "raptor/multilevel/level.hpp" #include diff --git a/raptor/precondition/tests/CMakeLists.txt b/raptor/precondition/tests/CMakeLists.txt new file mode 100644 index 00000000..08abb192 --- /dev/null +++ b/raptor/precondition/tests/CMakeLists.txt @@ -0,0 +1,53 @@ +add_executable(test_jacobi_aniso test_jacobi_aniso.cpp) +target_link_libraries(test_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(AnisoJacobiTest ./test_jacobi_aniso) + +add_executable(test_jacobi_laplacian test_jacobi_laplacian.cpp) +target_link_libraries(test_jacobi_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(LaplaceJacobiTest ./test_jacobi_laplacian) + +add_executable(test_gs_aniso test_gs_aniso.cpp) +target_link_libraries(test_gs_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(AnisoGSTest ./test_gs_aniso) + +add_executable(test_gs_laplacian test_gs_laplacian.cpp) +target_link_libraries(test_gs_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(LaplaceGSTest ./test_gs_laplacian) + +# CANNOT CURRENTLY RUN THESE TESTS, BUT RAPTOR SEEMS CORRECT +# TODO : UNCOMMENT WHEN PYAMG BUG IS FIXED +# +#add_executable(test_sor_aniso test_sor_aniso.cpp) +#target_link_libraries(test_sor_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +#add_test(AnisoSORTest ./test_sor_aniso) +# +#add_executable(test_sor_laplacian test_sor_laplacian.cpp) +#target_link_libraries(test_sor_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) +#add_test(LaplaceSORTest ./test_sor_laplacian) + +#add_executable(test_bsr_jacobi_aniso test_bsr_jacobi_aniso.cpp) +#target_link_libraries(test_bsr_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +#add_test(BSRAnisoJacobiTest ./test_bsr_jacobi_aniso) + +add_executable(test_bsr_jacobi_aniso test_bsr_jacobi_aniso.cpp) +target_link_libraries(test_bsr_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(BSRAnisoJacobiTest ./test_bsr_jacobi_aniso) + +add_executable(test_bsr_jacobi_laplacian test_bsr_jacobi_laplacian.cpp) +target_link_libraries(test_bsr_jacobi_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(BSRLaplaceJacobiTest ./test_bsr_jacobi_laplacian) + +add_executable(test_bsr_gs_aniso test_bsr_gs_aniso.cpp) +target_link_libraries(test_bsr_gs_aniso raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(BSRAnisoGSTest ./test_bsr_gs_aniso) + +add_executable(test_bsr_gs_laplacian test_bsr_gs_laplacian.cpp) +target_link_libraries(test_bsr_gs_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) +add_test(BSRLaplaceGSTest ./test_bsr_gs_laplacian) + +if (WITH_MPI) + #TODO Add Parallel Relaxation Tests Here + + # TODO Add Parallel Diagonal Scale Tests Here +endif() + diff --git a/raptor/core/tests/README.md b/raptor/precondition/tests/README.md similarity index 100% rename from raptor/core/tests/README.md rename to raptor/precondition/tests/README.md diff --git a/raptor/util/tests/test_bsr_gs_aniso.cpp b/raptor/precondition/tests/test_bsr_gs_aniso.cpp similarity index 100% rename from raptor/util/tests/test_bsr_gs_aniso.cpp rename to raptor/precondition/tests/test_bsr_gs_aniso.cpp diff --git a/raptor/util/tests/test_bsr_gs_laplacian.cpp b/raptor/precondition/tests/test_bsr_gs_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_bsr_gs_laplacian.cpp rename to raptor/precondition/tests/test_bsr_gs_laplacian.cpp diff --git a/raptor/util/tests/test_bsr_jacobi_aniso.cpp b/raptor/precondition/tests/test_bsr_jacobi_aniso.cpp similarity index 100% rename from raptor/util/tests/test_bsr_jacobi_aniso.cpp rename to raptor/precondition/tests/test_bsr_jacobi_aniso.cpp diff --git a/raptor/util/tests/test_bsr_jacobi_laplacian.cpp b/raptor/precondition/tests/test_bsr_jacobi_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_bsr_jacobi_laplacian.cpp rename to raptor/precondition/tests/test_bsr_jacobi_laplacian.cpp diff --git a/raptor/util/tests/test_bsr_spmv_aniso.cpp b/raptor/precondition/tests/test_bsr_spmv_aniso.cpp similarity index 100% rename from raptor/util/tests/test_bsr_spmv_aniso.cpp rename to raptor/precondition/tests/test_bsr_spmv_aniso.cpp diff --git a/raptor/util/tests/test_bsr_spmv_laplacian.cpp b/raptor/precondition/tests/test_bsr_spmv_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_bsr_spmv_laplacian.cpp rename to raptor/precondition/tests/test_bsr_spmv_laplacian.cpp diff --git a/raptor/util/tests/test_bsr_spmv_random.cpp b/raptor/precondition/tests/test_bsr_spmv_random.cpp similarity index 100% rename from raptor/util/tests/test_bsr_spmv_random.cpp rename to raptor/precondition/tests/test_bsr_spmv_random.cpp diff --git a/raptor/util/tests/test_gs_aniso.cpp b/raptor/precondition/tests/test_gs_aniso.cpp similarity index 100% rename from raptor/util/tests/test_gs_aniso.cpp rename to raptor/precondition/tests/test_gs_aniso.cpp diff --git a/raptor/util/tests/test_gs_laplacian.cpp b/raptor/precondition/tests/test_gs_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_gs_laplacian.cpp rename to raptor/precondition/tests/test_gs_laplacian.cpp diff --git a/raptor/util/tests/test_jacobi_aniso.cpp b/raptor/precondition/tests/test_jacobi_aniso.cpp similarity index 100% rename from raptor/util/tests/test_jacobi_aniso.cpp rename to raptor/precondition/tests/test_jacobi_aniso.cpp diff --git a/raptor/util/tests/test_jacobi_laplacian.cpp b/raptor/precondition/tests/test_jacobi_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_jacobi_laplacian.cpp rename to raptor/precondition/tests/test_jacobi_laplacian.cpp diff --git a/raptor/util/tests/test_par_add.cpp b/raptor/precondition/tests/test_par_add.cpp similarity index 100% rename from raptor/util/tests/test_par_add.cpp rename to raptor/precondition/tests/test_par_add.cpp diff --git a/raptor/util/tests/test_par_scale_aniso.cpp b/raptor/precondition/tests/test_par_scale_aniso.cpp similarity index 100% rename from raptor/util/tests/test_par_scale_aniso.cpp rename to raptor/precondition/tests/test_par_scale_aniso.cpp diff --git a/raptor/util/tests/test_par_spmv_aniso.cpp b/raptor/precondition/tests/test_par_spmv_aniso.cpp similarity index 100% rename from raptor/util/tests/test_par_spmv_aniso.cpp rename to raptor/precondition/tests/test_par_spmv_aniso.cpp diff --git a/raptor/util/tests/test_par_spmv_laplacian.cpp b/raptor/precondition/tests/test_par_spmv_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_par_spmv_laplacian.cpp rename to raptor/precondition/tests/test_par_spmv_laplacian.cpp diff --git a/raptor/util/tests/test_par_spmv_random.cpp b/raptor/precondition/tests/test_par_spmv_random.cpp similarity index 100% rename from raptor/util/tests/test_par_spmv_random.cpp rename to raptor/precondition/tests/test_par_spmv_random.cpp diff --git a/raptor/util/tests/test_parmetis.cpp b/raptor/precondition/tests/test_parmetis.cpp similarity index 100% rename from raptor/util/tests/test_parmetis.cpp rename to raptor/precondition/tests/test_parmetis.cpp diff --git a/raptor/util/tests/test_ptscotch.cpp b/raptor/precondition/tests/test_ptscotch.cpp similarity index 100% rename from raptor/util/tests/test_ptscotch.cpp rename to raptor/precondition/tests/test_ptscotch.cpp diff --git a/raptor/util/tests/test_repartition.cpp b/raptor/precondition/tests/test_repartition.cpp similarity index 100% rename from raptor/util/tests/test_repartition.cpp rename to raptor/precondition/tests/test_repartition.cpp diff --git a/raptor/util/tests/test_sor_aniso.cpp b/raptor/precondition/tests/test_sor_aniso.cpp similarity index 100% rename from raptor/util/tests/test_sor_aniso.cpp rename to raptor/precondition/tests/test_sor_aniso.cpp diff --git a/raptor/util/tests/test_sor_laplacian.cpp b/raptor/precondition/tests/test_sor_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_sor_laplacian.cpp rename to raptor/precondition/tests/test_sor_laplacian.cpp diff --git a/raptor/util/tests/test_spmv_aniso.cpp b/raptor/precondition/tests/test_spmv_aniso.cpp similarity index 100% rename from raptor/util/tests/test_spmv_aniso.cpp rename to raptor/precondition/tests/test_spmv_aniso.cpp diff --git a/raptor/util/tests/test_spmv_laplacian.cpp b/raptor/precondition/tests/test_spmv_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_spmv_laplacian.cpp rename to raptor/precondition/tests/test_spmv_laplacian.cpp diff --git a/raptor/util/tests/test_spmv_random.cpp b/raptor/precondition/tests/test_spmv_random.cpp similarity index 100% rename from raptor/util/tests/test_spmv_random.cpp rename to raptor/precondition/tests/test_spmv_random.cpp diff --git a/raptor/util/tests/test_tap_spmv_aniso.cpp b/raptor/precondition/tests/test_tap_spmv_aniso.cpp similarity index 100% rename from raptor/util/tests/test_tap_spmv_aniso.cpp rename to raptor/precondition/tests/test_tap_spmv_aniso.cpp diff --git a/raptor/util/tests/test_tap_spmv_laplacian.cpp b/raptor/precondition/tests/test_tap_spmv_laplacian.cpp similarity index 100% rename from raptor/util/tests/test_tap_spmv_laplacian.cpp rename to raptor/precondition/tests/test_tap_spmv_laplacian.cpp diff --git a/raptor/util/tests/test_tap_spmv_random.cpp b/raptor/precondition/tests/test_tap_spmv_random.cpp similarity index 100% rename from raptor/util/tests/test_tap_spmv_random.cpp rename to raptor/precondition/tests/test_tap_spmv_random.cpp diff --git a/raptor/profiling/profile_comm.cpp b/raptor/profiling/profile_comm.cpp index da4da2ed..74e956aa 100644 --- a/raptor/profiling/profile_comm.cpp +++ b/raptor/profiling/profile_comm.cpp @@ -1,4 +1,4 @@ -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" using namespace raptor; #define short_cutoff 500 diff --git a/raptor/raptor.hpp b/raptor/raptor.hpp index 23590b98..967a20e3 100644 --- a/raptor/raptor.hpp +++ b/raptor/raptor.hpp @@ -3,48 +3,7 @@ #ifndef RAPTOR_HPP #define RAPTOR_HPP -// Define types such as int and double sizes -#include "core/types.hpp" -#include "core/utilities.hpp" - -// Data about topology and matrix partitions -#ifndef NO_MPI - #include "core/mpi_types.hpp" - #include "core/partition.hpp" - #include "core/topology.hpp" -#endif - -// Matrix and vector classes -#include "core/matrix.hpp" -#include "core/vector.hpp" -#ifndef NO_MPI - #include "core/par_matrix.hpp" - #include "core/par_vector.hpp" -#endif - -// Communication classes -#ifndef NO_MPI - #include "core/comm_data.hpp" - #include "core/comm_pkg.hpp" -#endif - -// Stencil and diffusion classes -#include "gallery/laplacian27pt.hpp" -#include "gallery/diffusion.hpp" -#include "gallery/stencil.hpp" -#include "gallery/random.hpp" -#ifndef NO_MPI - #include "gallery/par_stencil.hpp" - #include "gallery/par_random.hpp" -#endif - -// Matrix IO -#include "gallery/matrix_IO.hpp" -#include "gallery/matrix_market.hpp" -#ifndef NO_MPI - #include "gallery/par_matrix_IO.hpp" - #include "gallery/par_matrix_market.hpp" -#endif +#include "raptor-sparse.hpp" // External #ifdef USING_HYPRE @@ -96,25 +55,14 @@ #include "krylov/par_bicgstab.hpp" // Relaxation methods -#include "util/linalg/relax.hpp" +#include "precondition/relax.hpp" #ifndef NO_MPI - #include "util/linalg/par_relax.hpp" -#endif - -// Repartitioning matrix methods -#ifndef NO_MPI -#include "util/linalg/repartition.hpp" -#endif -#ifdef USING_PTSCOTCH - #include "util/linalg/external/ptscotch_wrapper.hpp" -#endif -#ifdef USING_PARMETIS - #include "util/linalg/external/parmetis_wrapper.hpp" + #include "precondition/par_relax.hpp" #endif // Preconditioning Methods #ifndef NO_MPI - #include "util/linalg/par_diag_scale.hpp" + #include "precondition/par_diag_scale.hpp" #endif diff --git a/raptor/ruge_stuben/cf_splitting.hpp b/raptor/ruge_stuben/cf_splitting.hpp index 2d478fa5..93372f81 100644 --- a/raptor/ruge_stuben/cf_splitting.hpp +++ b/raptor/ruge_stuben/cf_splitting.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_SPLITTING_HPP #define RAPTOR_SPLITTING_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/ruge_stuben/interpolation.cpp b/raptor/ruge_stuben/interpolation.cpp index a1a3501b..34596e52 100644 --- a/raptor/ruge_stuben/interpolation.cpp +++ b/raptor/ruge_stuben/interpolation.cpp @@ -1,7 +1,6 @@ // Copyright (c) 2015-2017, RAPtor Developer Team // License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause #include "assert.h" -#include "raptor/core/types.hpp" #include "interpolation.hpp" namespace raptor { diff --git a/raptor/ruge_stuben/interpolation.hpp b/raptor/ruge_stuben/interpolation.hpp index e9d24392..67a1aa1f 100644 --- a/raptor/ruge_stuben/interpolation.hpp +++ b/raptor/ruge_stuben/interpolation.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_DIRECT_INTERPOLATION_HPP #define RAPTOR_DIRECT_INTERPOLATION_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/ruge_stuben/par_cf_splitting.hpp b/raptor/ruge_stuben/par_cf_splitting.hpp index 420b375c..a44ebffd 100644 --- a/raptor/ruge_stuben/par_cf_splitting.hpp +++ b/raptor/ruge_stuben/par_cf_splitting.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_PAR_SPLITTING_HPP #define RAPTOR_PAR_SPLITTING_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" #include "cf_splitting.hpp" namespace raptor { diff --git a/raptor/ruge_stuben/par_interpolation.cpp b/raptor/ruge_stuben/par_interpolation.cpp index a77a5355..c79ba180 100644 --- a/raptor/ruge_stuben/par_interpolation.cpp +++ b/raptor/ruge_stuben/par_interpolation.cpp @@ -1,8 +1,7 @@ // Copyright (c) 2015-2017, RAPtor Developer Team // License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause #include "assert.h" -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "par_interpolation.hpp" namespace raptor { diff --git a/raptor/ruge_stuben/par_interpolation.hpp b/raptor/ruge_stuben/par_interpolation.hpp index ac546954..0e29b935 100644 --- a/raptor/ruge_stuben/par_interpolation.hpp +++ b/raptor/ruge_stuben/par_interpolation.hpp @@ -3,8 +3,7 @@ #ifndef RAPTOR_PAR_DIRECT_INTERPOLATION_HPP #define RAPTOR_PAR_DIRECT_INTERPOLATION_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { diff --git a/raptor/tests/compare.hpp b/raptor/tests/compare.hpp index bfb0cf10..fc6196b1 100644 --- a/raptor/tests/compare.hpp +++ b/raptor/tests/compare.hpp @@ -9,8 +9,7 @@ #ifndef RAPTOR_TEST_COMPARE_HPP #define RAPTOR_TEST_COMPARE_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { void compare(CSRMatrix* A, CSRMatrix* A_rap) diff --git a/raptor/tests/par_compare.hpp b/raptor/tests/par_compare.hpp index 6b1256e1..0e32869f 100644 --- a/raptor/tests/par_compare.hpp +++ b/raptor/tests/par_compare.hpp @@ -8,8 +8,7 @@ #ifndef RAPTOR_TEST_PAR_COMPARE_HPP #define RAPTOR_TEST_PAR_COMPARE_HPP -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" +#include "raptor-sparse.hpp" namespace raptor { void compare(ParCSRMatrix* A, ParCSRMatrix* A_rap) diff --git a/raptor/util/linalg/CMakeLists.txt b/raptor/util/linalg/CMakeLists.txt deleted file mode 100644 index 39db5374..00000000 --- a/raptor/util/linalg/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Include the directory itself as a path to include directories -set(CMAKE_INCLUDE_CURRENT_DIR ON) - -#Create a variable called linalg_SOURCES containing all .cpp files: - -if (WITH_MPI) - set(par_linalg_HEADERS - util/linalg/repartition.hpp - util/linalg/par_relax.hpp - util/linalg/par_diag_scale.hpp - ) - set(par_linalg_SOURCES - util/linalg/par_spmv.cpp - util/linalg/par_matmult.cpp - util/linalg/par_add.cpp - util/linalg/par_relax.cpp - util/linalg/repartition.cpp - util/linalg/par_diag_scale.cpp - ) -else () - set(par_linalg_HEADERS - "" - ) - set (par_linalg_SOURCES - "" - ) -endif() - -set(linalg_HEADERS - util/linalg/relax.hpp - ${par_linalg_HEADERS} - ${external_linalg_HEADERS} - PARENT_SCOPE - ) -set(linalg_SOURCES - util/linalg/matmult.cpp - util/linalg/relax.cpp - util/linalg/add.cpp - util/linalg/spmv.cpp - ${par_linalg_SOURCES} - PARENT_SCOPE - ) - - - - diff --git a/raptor/util/linalg/add.cpp b/raptor/util/linalg/add.cpp deleted file mode 100644 index 765c91e5..00000000 --- a/raptor/util/linalg/add.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "assert.h" -#include "raptor/core/matrix.hpp" - -using namespace raptor; - -// TODO -- currently assumes partitions are the same -Matrix* Matrix::add(CSRMatrix* B, bool remove_dup) -{ - CSRMatrix* A = to_CSR(); - CSRMatrix* C = new CSRMatrix(n_rows, n_cols, 2*nnz); - A->add_append(B, C, remove_dup); - delete A; - return C; -} -void Matrix::add_append(CSRMatrix* B, CSRMatrix* C, bool remove_dup) -{ - CSRMatrix* A = to_CSR(); - A->add_append(B, C, remove_dup); - delete A; -} -Matrix* Matrix::subtract(CSRMatrix* B) -{ - CSRMatrix* A = to_CSR(); - CSRMatrix* C = A->subtract(B); - delete A; - return C; -} - - -CSRMatrix* CSRMatrix::add(CSRMatrix* B, bool remove_dup) -{ - CSRMatrix* C = new CSRMatrix(n_rows, n_cols, 2*nnz); - add_append(B, C, remove_dup); - return C; -} - -void CSRMatrix::add_append(CSRMatrix* B, CSRMatrix* C, bool remove_dup) -{ - int start, end; - - C->resize(n_rows, n_cols); - int C_nnz = nnz + B->nnz; - C->idx2.resize(C_nnz); - C->vals.resize(C_nnz); - - C_nnz = 0; - C->idx1[0] = 0; - for (int i = 0; i < n_rows; i++) - { - start = idx1[i]; - end = idx1[i+1]; - std::copy(idx2.begin() + start, - idx2.begin() + end, - C->idx2.begin() + C_nnz); - std::copy(vals.begin() + start, - vals.begin() + end, - C->vals.begin() + C_nnz); - C_nnz += (end - start); - - start = B->idx1[i]; - end = B->idx1[i+1]; - std::copy(B->idx2.begin() + start, - B->idx2.begin() + end, - C->idx2.begin() + C_nnz); - std::copy(B->vals.begin() + start, - B->vals.begin() + end, - C->vals.begin() + C_nnz); - C_nnz += (end - start); - - C->idx1[i+1] = C_nnz; - } - C->nnz = C_nnz; - C->sort(); - if (remove_dup) - C->remove_duplicates(); -} - -CSRMatrix* CSRMatrix::subtract(CSRMatrix* B) -{ - int start, end; - - assert(n_rows == B->n_rows); - assert(n_cols == B->n_cols); - - CSRMatrix* C = new CSRMatrix(n_rows, n_cols, 2*nnz); - C->idx1[0] = 0; - for (int i = 0; i < n_rows; i++) - { - start = idx1[i]; - end = idx1[i+1]; - for (int j = start; j < end; j++) - { - C->idx2.emplace_back(idx2[j]); - C->vals.emplace_back(vals[j]); - } - start = B->idx1[i]; - end = B->idx1[i+1]; - for (int j = start; j < end; j++) - { - C->idx2.emplace_back(B->idx2[j]); - C->vals.emplace_back(-B->vals[j]); - } - C->idx1[i+1] = C->idx2.size(); - } - C->nnz = C->idx2.size(); - C->sort(); - C->remove_duplicates(); - - return C; -} - - diff --git a/raptor/util/linalg/external/CMakeLists.txt b/raptor/util/linalg/external/CMakeLists.txt deleted file mode 100644 index 9bf557f3..00000000 --- a/raptor/util/linalg/external/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -# Include the directory itself as a path to include directories -set(CMAKE_INCLUDE_CURRENT_DIR ON) - -#Create a variable called linalg_SOURCES containing all .cpp files: - -if (WITH_PTSCOTCH) - set(ptscotch_linalg_HEADERS - util/linalg/external/ptscotch_wrapper.hpp - ) -else() - set(ptscotch_linalg_HEADERS - "" - ) -endif() - -if (WITH_PARMETIS) - set(parmetis_linalg_HEADERS - util/linalg/external/parmetis_wrapper.hpp - ) -else() - set(parmetis_linalg_HEADERS - "" - ) -endif() - -set(ext_linalg_HEADERS - ${ptscotch_linalg_HEADERS} - ${parmetis_linalg_HEADERS} - PARENT_SCOPE - ) - - - - diff --git a/raptor/util/linalg/external/parmetis_wrapper.hpp b/raptor/util/linalg/external/parmetis_wrapper.hpp deleted file mode 100644 index 6f04be6d..00000000 --- a/raptor/util/linalg/external/parmetis_wrapper.hpp +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef RAPTOR_GALLERY_PARMETIS_HPP -#define RAPTOR_GALLERY_PARMETIS_HPP - -#include "raptor/raptor.hpp" -#include "parmetis.h" - -using namespace raptor; - -int* parmetis_partition(ParCSRMatrix* A) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - int start, end; - int col, global_col; - - // ParMetis Partitioner Variables - RAPtor_MPI_Comm comm = RAPtor_MPI_COMM_WORLD; - - // How vertices of graph are distributed among processes; - // Array size num_procs+1 - // Range of vertices local to each processor - int* vtxdist = A->partition->first_cols.data(); - - // Local adjacency structure - std::vector xadj(A->local_num_rows+1); - std::vector adjncy(A->local_nnz); - xadj[0] = 0; - int nnz = 0; - for (int i = 0; i < A->local_num_rows; i++) - { - start = A->on_proc->idx1[i]; - end = A->on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = A->on_proc->idx2[j]; - global_col = A->on_proc_column_map[col]; - adjncy[nnz++] = global_col; - } - - start = A->off_proc->idx1[i]; - end = A->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = A->off_proc->idx2[j]; - global_col = A->off_proc_column_map[col]; - adjncy[nnz++] = global_col; - } - - xadj[i+1] = nnz; - } - - // Weights of vertices and edges - int* vwgt = NULL; - int* adjwgt = NULL; - - // Is the graph weighted? - // 0 - No weighting - // 1 - Edges only - // 2 - Vertices only - // 3 - Both edges and vertices - int wgtflag = 0; - - // Numbering scheme - // 0 - Cstyle - // 1 - Fortran - int numflag = 0; - - // Number of weights that each vertex has; - int ncon = 1; - - // Number of sub-domains desired; - int nparts = num_procs; - - // Fraction of vertex weight distributed to each subdomain - // Array size ncon x nparts - // For balanced sub-domains, each part gets 1/nparts - std::vector tpwgts(nparts, 1.0/nparts); - - // Imbalance tolerance for each vertex weight - // Array size ncon - // Perfect balance: 1 - // Perfect imbalance: nparts - // Recommended: 1.05 - std::vector ubvec(1, 1.05); - - // Additional Options: - // Options[0] = 0 (default values) or 1 (specify options[1], options[2]) - // Options[1]: levels of info to be returned (0-default, 1-timing info) - // Options[2]: random number seed for routine - std::vector options(3, 0); - - // Return value: Number of edges that are cut by partitioning - int edgecut; - - // Return value: Array (size of local_num_rows) of partition for each row - int* part = NULL; - if (A->local_num_rows) - part = new int[A->local_num_rows]; - - int err = ParMETIS_V3_PartKway(vtxdist, xadj.data(), adjncy.data(), vwgt, adjwgt, - &wgtflag, &numflag, &ncon, &nparts, tpwgts.data(), ubvec.data(), options.data(), - &edgecut, part, &comm); - - return part; -} - -#endif diff --git a/raptor/util/linalg/external/ptscotch_wrapper.hpp b/raptor/util/linalg/external/ptscotch_wrapper.hpp deleted file mode 100644 index fd83bb2f..00000000 --- a/raptor/util/linalg/external/ptscotch_wrapper.hpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#ifndef RAPTOR_GALLERY_PTSCOTCH_HPP -#define RAPTOR_GALLERY_PTSCOTCH_HPP - -#include -#include "core/types.hpp" -#include "ptscotch.h" -#include -#include -#include "core/par_matrix.hpp" -#include - -using namespace raptor; - -int* ptscotch_partition(ParCSRMatrix* A) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - // Variables for Graph Partitioning - SCOTCH_Num* partition = new SCOTCH_Num[A->local_num_rows + 2]; - SCOTCH_Num baseval = 0; - SCOTCH_Num vertlocnbr = A->local_num_rows; - SCOTCH_Num vertlocmax = A->local_num_rows; - SCOTCH_Num* vertloctab = new SCOTCH_Num[vertlocnbr + 2]; - SCOTCH_Num* vendloctab = &vertloctab[1]; - SCOTCH_Num* veloloctab = NULL; - SCOTCH_Num* vlblloctab = NULL; - SCOTCH_Num edgelocnbr = A->local_nnz; - SCOTCH_Num edgelocsiz = A->local_nnz; - SCOTCH_Num* edgeloctab = new SCOTCH_Num[edgelocsiz + 1]; - SCOTCH_Num* edgegsttab = NULL; - SCOTCH_Num* edloloctab = NULL; - - int row_start, row_end; - int idx, gbl_idx, ctr; - int err; - - // Find matrix edge indices for PT Scotch - ctr = 0; - vertloctab[0] = 0; - for (int row = 0; row < A->local_num_rows; row++) - { - row_start = A->on_proc->idx1[row]; - row_end = A->on_proc->idx1[row+1]; - for (int j = row_start; j < row_end; j++) - { - idx = A->on_proc->idx2[j]; - if (idx == row) continue; - gbl_idx = A->on_proc_column_map[idx]; - edgeloctab[ctr] = gbl_idx; - ctr++; - } - - if (A->off_proc_num_cols) - { - row_start = A->off_proc->idx1[row]; - row_end = A->off_proc->idx1[row+1]; - for (int j = row_start; j < row_end; j++) - { - idx = A->off_proc->idx2[j]; - gbl_idx = A->off_proc_column_map[idx]; - edgeloctab[ctr] = gbl_idx; - ctr++; - } - } - vertloctab[row+1] = ctr; - } - edgelocnbr = ctr; - edgelocsiz = ctr; - - - SCOTCH_Dgraph dgraphdata; - SCOTCH_Strat stratdata; - SCOTCH_Arch archdata; - - RAPtor_MPI_Comm comm; - RAPtor_MPI_Comm_dup(RAPtor_MPI_COMM_WORLD, &comm); - - SCOTCH_dgraphInit(&dgraphdata, comm); - SCOTCH_dgraphBuild(&dgraphdata, baseval, vertlocnbr, vertlocmax, - vertloctab, vendloctab, veloloctab, vlblloctab, edgelocnbr, edgelocsiz, - edgeloctab, edgegsttab, edloloctab); - SCOTCH_dgraphCheck(&dgraphdata); - - SCOTCH_stratInit(&stratdata); - SCOTCH_dgraphPart(&dgraphdata, num_procs, &stratdata, partition); - - SCOTCH_stratExit(&stratdata); - SCOTCH_dgraphExit(&dgraphdata); - - delete[] vertloctab; - delete[] edgeloctab; - - RAPtor_MPI_Comm_free(&comm); - - return partition; -} - - -#endif - diff --git a/raptor/util/linalg/matmult.cpp b/raptor/util/linalg/matmult.cpp deleted file mode 100644 index 3552ae6d..00000000 --- a/raptor/util/linalg/matmult.cpp +++ /dev/null @@ -1,352 +0,0 @@ -#include "raptor/core/matrix.hpp" - -using namespace raptor; - -// Declare Private Methods -std::vector& form_new(const CSRMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals); -std::vector& form_new(const CSRMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals); -std::vector& form_new(const CSCMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals); -std::vector& form_new(const CSCMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals); -void init_sums(std::vector& sums, int size, int b_size); -void init_sums(std::vector& sums, int size, int b_size); -void zero_sum(double* sum, int b_size); -void zero_sum(double** sum, int b_size); -void finalize_sums(std::vector& sums); -void finalize_sums(std::vector& sums); - - -std::vector& form_new(const CSRMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals) -{ - CSRMatrix* C = new CSRMatrix(A->n_rows, B->n_cols); - *C_ptr = C; - return C->vals; -} -std::vector& form_new(const CSRMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals) -{ - BSRMatrix* C = new BSRMatrix(A->n_rows, B->n_cols, - A->b_rows, B->b_cols); - *C_ptr = C; - return C->block_vals; -} -std::vector& form_new(const CSCMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals) -{ - CSRMatrix* C = new CSRMatrix(A->n_cols, B->n_cols); - *C_ptr = C; - return C->vals; -} -std::vector& form_new(const CSCMatrix* A, const CSRMatrix* B, - CSRMatrix** C_ptr, std::vector& A_vals) -{ - BSRMatrix* C = new BSRMatrix(A->n_cols, B->n_cols, - A->b_cols, B->b_cols); - *C_ptr = C; - return C->block_vals; -} - -void init_sums(std::vector& sums, int size, int b_size) -{ - sums.resize(size, 0); -} -void init_sums(std::vector& sums, int size, int b_size) -{ - for (int i = 0; i < size; i++) - { - sums.emplace_back(new double[b_size]); - for (int j = 0; j < b_size; j++) - sums[i][j] = 0.0; - } -} - -void zero_sum(double* sum, int b_size) -{ - *sum = 0; -} -void zero_sum(double** sum, int b_size) -{ - (*sum) = new double[b_size]; - for (int i = 0; i < b_size; i++) - (*sum)[i] = 0; -} - -void finalize_sums(std::vector& sums) -{ - return; -} -void finalize_sums(std::vector& sums) -{ - for (std::vector::iterator it = sums.begin(); - it != sums.end(); ++it) - delete[] *it; -} - -template -CSRMatrix* spgemm_helper(const CSRMatrix* A, const CSRMatrix* B, - std::vector& A_vals, std::vector& B_vals, - int* B_to_C = NULL) -{ - std::vector next(B->n_cols, -1); - std::vector sums; - init_sums(sums, B->n_cols, B->b_size); - - CSRMatrix* C = NULL; - std::vector& C_vals = form_new(A, B, &C, A_vals); - C->reserve_size(1.5*A->nnz); - - C->idx1[0] = 0; - for (int i = 0; i < A->n_rows; i++) - { - int head = -2; - int length = 0; - int row_start_A = A->idx1[i]; - int row_end_A = A->idx1[i+1]; - for (int j = row_start_A; j < row_end_A; j++) - { - int col_A = A->idx2[j]; - T val_A = A_vals[j]; - int row_start_B = B->idx1[col_A]; - int row_end_B = B->idx1[col_A+1]; - for (int k = row_start_B; k < row_end_B; k++) - { - int col_B = B->idx2[k]; - A->mult_vals(val_A, B_vals[k], &sums[col_B], - A->b_rows, B->b_cols, A->b_cols); - if (next[col_B] == -1) - { - next[col_B] = head; - head = col_B; - length++; - } - } - } - for (int j = 0; j < length; j++) - { - double val = A->abs_val(sums[head]); - if (val > zero_tol) - { - if (B_to_C) - { - C->idx2.emplace_back(B_to_C[head]); - } - else - { - C->idx2.emplace_back(head); - } - C_vals.emplace_back(sums[head]); - } - int tmp = head; - head = next[head]; - next[tmp] = -1; - zero_sum(&sums[tmp], A->b_size); - } - C->idx1[i+1] = C->idx2.size(); - } - C->nnz = C->idx2.size(); - - finalize_sums(sums); - - return C; -} - -template -CSRMatrix* spgemm_T_helper(const CSCMatrix* A, const CSRMatrix* B, - std::vector& A_vals, std::vector& B_vals, - int* C_map = NULL) -{ - CSRMatrix* C; - std::vector& C_vals = form_new(A, B, &C, A_vals); - C->reserve_size(1.5*B->nnz); - - std::vector next(B->n_cols, -1); - std::vector sums; - init_sums(sums, B->n_cols, A->b_size); - - C->idx1[0] = 0; - for (int i = 0; i < A->n_cols; i++) - { - int head = -2; - int length = 0; - int row_start_AT = A->idx1[i]; - int row_end_AT = A->idx1[i+1]; - for (int j = row_start_AT; j < row_end_AT; j++) - { - int col_AT = A->idx2[j]; - T val_AT = A_vals[j]; - int row_start = B->idx1[col_AT]; - int row_end = B->idx1[col_AT+1]; - for (int k = row_start; k < row_end; k++) - { - int col = B->idx2[k]; - A->mult_T_vals(val_AT, B_vals[k], &sums[col], - A->b_cols, B->b_cols, A->b_rows); - if (next[col] == -1) - { - next[col] = head; - head = col; - length++; - } - } - } - for (int j = 0; j < length; j++) - { - if (A->abs_val(sums[head]) > zero_tol) - { - if (C_map) - { - C->idx2.emplace_back(C_map[head]); - } - else - { - C->idx2.emplace_back(head); - } - C_vals.emplace_back(sums[head]); - } - int tmp = head; - head = next[head]; - next[tmp] = -1; - zero_sum(&sums[tmp], A->b_size); - } - C->idx1[i+1] = C->idx2.size(); - } - C->nnz = C->idx2.size(); - - finalize_sums(sums); - - return C; -} - - -CSRMatrix* Matrix::mult(CSRMatrix* B, int* B_to_C) -{ - return spgemm(B, B_to_C); -} -CSRMatrix* Matrix::mult(CSCMatrix* B, int* B_to_C) -{ - CSRMatrix* B_csr = B->to_CSR(); - CSRMatrix* C = spgemm(B_csr, B_to_C); - delete B_csr; - return C; -} -CSRMatrix* Matrix::mult(COOMatrix* B, int* B_to_C) -{ - CSRMatrix* B_csr = B->to_CSR(); - CSRMatrix* C = spgemm(B_csr, B_to_C); - delete B_csr; - return C; -} - -CSRMatrix* Matrix::mult_T(CSCMatrix* A, int* C_map) -{ - return spgemm_T(A, C_map); -} -CSRMatrix* Matrix::mult_T(CSRMatrix* A, int* C_map) -{ - CSCMatrix* A_csc = A->to_CSC(); - CSRMatrix* C = spgemm_T(A_csc, C_map); - delete A_csc; - return C; -} -CSRMatrix* Matrix::mult_T(COOMatrix* A, int* C_map) -{ - CSCMatrix* A_csc = A->to_CSC(); - CSRMatrix* C = spgemm_T(A_csc, C_map); - delete A_csc; - return C; -} - -CSRMatrix* CSRMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - return spgemm_helper(this, B, vals, B->vals, B_to_C); -} -BSRMatrix* BSRMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - BSRMatrix* B_bsr = (BSRMatrix*) B; - return (BSRMatrix*) spgemm_helper(this, B_bsr, block_vals, - B_bsr->block_vals, B_to_C); -} -CSRMatrix* COOMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - CSRMatrix* A_csr = to_CSR(); - CSRMatrix* C = spgemm_helper(A_csr, B, A_csr->vals, B->vals, - B_to_C); - delete A_csr; - return C; -} -BSRMatrix* BCOOMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - BSRMatrix* A_bsr = (BSRMatrix*) to_BSR(); - BSRMatrix* B_bsr = (BSRMatrix*) B; - BSRMatrix* C = (BSRMatrix*) spgemm_helper(A_bsr, B_bsr, - A_bsr->block_vals, B_bsr->block_vals, B_to_C); - delete A_bsr; - return C; -} -CSRMatrix* CSCMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - CSRMatrix* A_csr = to_CSR(); - CSRMatrix* C = spgemm_helper(A_csr, B, A_csr->vals, B->vals, - B_to_C); - delete A_csr; - return C; -} -BSRMatrix* BSCMatrix::spgemm(CSRMatrix* B, int* B_to_C) -{ - BSRMatrix* A_bsr = (BSRMatrix*) to_BSR(); - BSRMatrix* B_bsr = (BSRMatrix*) B; - BSRMatrix* C = (BSRMatrix*) spgemm_helper(A_bsr, B_bsr, - A_bsr->block_vals, B_bsr->block_vals, B_to_C); - delete A_bsr; - return C; -} - - -CSRMatrix* CSRMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - return spgemm_T_helper(A, this, A->vals, vals, C_map); -} -BSRMatrix* BSRMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - BSCMatrix* A_bsc = (BSCMatrix*) A; - return (BSRMatrix*) spgemm_T_helper(A_bsc, this, - A_bsc->block_vals, block_vals, C_map); -} -CSRMatrix* COOMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - CSRMatrix* B_csr = to_CSR(); - CSRMatrix* C = spgemm_T_helper(A, B_csr, A->vals, - B_csr->vals, C_map); - delete B_csr; - return C; -} -BSRMatrix* BCOOMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - BSCMatrix* A_bsc = (BSCMatrix*) A; - BSRMatrix* B_bsr = (BSRMatrix*) to_BSR(); - BSRMatrix* C = (BSRMatrix*) spgemm_T_helper(A_bsc, B_bsr, - A_bsc->block_vals, B_bsr->block_vals, C_map); - delete B_bsr; - return C; -} -CSRMatrix* CSCMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - CSRMatrix* B_csr = to_CSR(); - CSRMatrix* C = spgemm_T_helper(A, B_csr, A->vals, - B_csr->vals, C_map); - delete B_csr; - return C; -} -BSRMatrix* BSCMatrix::spgemm_T(CSCMatrix* A, int* C_map) -{ - BSCMatrix* A_bsc = (BSCMatrix*) A; - BSRMatrix* B_bsr = (BSRMatrix*) to_BSR(); - BSRMatrix* C = (BSRMatrix*) spgemm_T_helper(A_bsc, B_bsr, - A_bsc->block_vals, B_bsr->block_vals, C_map); - delete B_bsr; - return C; -} diff --git a/raptor/util/linalg/par_add.cpp b/raptor/util/linalg/par_add.cpp deleted file mode 100644 index 73caa115..00000000 --- a/raptor/util/linalg/par_add.cpp +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "assert.h" -#include "raptor/core/par_matrix.hpp" - -using namespace raptor; - -// TODO -- currently assumes partitions are the same -ParMatrix* ParMatrix::add(ParCSRMatrix* B) -{ - return NULL; -} -ParMatrix* ParMatrix::subtract(ParCSRMatrix* B) -{ - return NULL; -} - -ParCSRMatrix* ParCSRMatrix::add(ParCSRMatrix* B) -{ - ParCSRMatrix* C = new ParCSRMatrix(partition, global_num_rows, global_num_cols, - local_num_rows, on_proc_num_cols, 0); - int start, end; - - std::vector off_proc_to_new; - std::vector B_off_proc_to_new; - if (off_proc_num_cols) off_proc_to_new.resize(off_proc_num_cols, 0); - if (B->off_proc_num_cols) B_off_proc_to_new.resize(B->off_proc_num_cols, 0); - - int ctr = 0; - int ctr_B = 0; - int global_col = 0; - int global_col_B = 0; - while (ctr < off_proc_num_cols || ctr_B < B->off_proc_num_cols) - { - if (ctr < off_proc_num_cols) global_col = off_proc_column_map[ctr]; - else global_col = partition->global_num_cols; - - if (ctr_B < B->off_proc_num_cols) global_col_B = B->off_proc_column_map[ctr_B]; - else global_col_B = B->partition->global_num_cols; - - if (global_col == global_col_B) - { - off_proc_to_new[ctr++] = C->off_proc_column_map.size(); - B_off_proc_to_new[ctr_B++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col); - } - else if (global_col < global_col_B) - { - off_proc_to_new[ctr++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col); - } - else - { - B_off_proc_to_new[ctr_B++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col_B); - } - } - C->off_proc_num_cols = C->off_proc_column_map.size(); - - - C->on_proc->idx1[0] = 0; - C->off_proc->idx1[0] = 0; - int on_nnz = on_proc->nnz + B->on_proc->nnz; - int off_nnz = off_proc->nnz + B->off_proc->nnz; - C->on_proc->idx2.resize(on_nnz); - C->on_proc->vals.resize(on_nnz); - C->off_proc->idx2.resize(off_nnz); - C->off_proc->vals.resize(off_nnz); - on_nnz = 0; - off_nnz = 0; - for (int i = 0; i < local_num_rows; i++) - { - // Add on_proc column indices and values - start = on_proc->idx1[i]; - end = on_proc->idx1[i+1]; - std::copy(on_proc->idx2.begin() + start, - on_proc->idx2.begin() + end, - C->on_proc->idx2.begin() + on_nnz); - std::copy(on_proc->vals.begin() + start, - on_proc->vals.begin() + end, - C->on_proc->vals.begin() + on_nnz); - on_nnz += (end - start); - - // Add on_proc columns and values from B - start = B->on_proc->idx1[i]; - end = B->on_proc->idx1[i+1]; - std::copy(B->on_proc->idx2.begin() + start, - B->on_proc->idx2.begin() + end, - C->on_proc->idx2.begin() + on_nnz); - std::copy(B->on_proc->vals.begin() + start, - B->on_proc->vals.begin() + end, - C->on_proc->vals.begin() + on_nnz); - on_nnz += (end - start); - - // Update rowptr - C->on_proc->idx1[i+1] = on_nnz; - - - // Add off_proc columns and values - start = off_proc->idx1[i]; - end = off_proc->idx1[i+1]; - std::copy(off_proc->idx2.begin() + start, - off_proc->idx2.begin() + end, - C->off_proc->idx2.begin() + off_nnz); - std::copy(off_proc->vals.begin() + start, - off_proc->vals.begin() + end, - C->off_proc->vals.begin() + off_nnz); - for (std::vector::iterator it = C->off_proc->idx2.begin() + off_nnz; - it != C->off_proc->idx2.begin() + off_nnz + (end - start); ++it) - { - *it = off_proc_to_new[*it]; - } - off_nnz += (end - start); - - // Add off_proc columns and values from B - start = B->off_proc->idx1[i]; - end = B->off_proc->idx1[i+1]; - std::copy(B->off_proc->idx2.begin() + start, - B->off_proc->idx2.begin() + end, - C->off_proc->idx2.begin() + off_nnz); - std::copy(B->off_proc->vals.begin() + start, - B->off_proc->vals.begin() + end, - C->off_proc->vals.begin() + off_nnz); - for (std::vector::iterator it = C->off_proc->idx2.begin() + off_nnz; - it != C->off_proc->idx2.begin() + off_nnz + (end - start); ++it) - { - *it = off_proc_to_new[*it]; - } - off_nnz += (end - start); - - // Update rowptr - C->off_proc->idx1[i+1] = off_nnz; - } - C->on_proc->nnz = C->on_proc->idx2.size(); - C->off_proc->nnz = C->off_proc->idx2.size(); - - C->on_proc_column_map.resize(on_proc_column_map.size()); - std::copy(on_proc_column_map.begin(), on_proc_column_map.end(), - C->on_proc_column_map.begin()); - C->local_row_map.resize(local_row_map.size()); - std::copy(local_row_map.begin(), local_row_map.end(), - C->local_row_map.begin()); - - C->on_proc->sort(); - C->on_proc->remove_duplicates(); - C->on_proc->move_diag(); - - C->off_proc->sort(); - C->off_proc->remove_duplicates(); - - if (C->off_proc_num_cols) - { - std::vector new_col(C->off_proc_num_cols, 0); - for (std::vector::iterator it = C->off_proc->idx2.begin(); - it != C->off_proc->idx2.end(); ++it) - { - new_col[*it] = 1; - } - ctr = 0; - for (int i = 0; i < C->off_proc_num_cols; i++) - { - if (new_col[i]) - new_col[i] = ctr++; - else - new_col[i] = -1; - } - C->off_proc_num_cols = ctr; - C->off_proc->n_cols = ctr; - C->off_proc_column_map.resize(ctr); - - for (std::vector::iterator it = C->off_proc->idx2.begin(); - it != C->off_proc->idx2.end(); ++it) - { - *it = new_col[*it]; - } - } - - C->local_nnz = C->on_proc->nnz + C->off_proc->nnz; - - return C; -} - - -ParCSRMatrix* ParCSRMatrix::subtract(ParCSRMatrix* B) -{ - ParCSRMatrix* C = new ParCSRMatrix(partition, global_num_rows, global_num_cols, - local_num_rows, on_proc_num_cols, 0); - int start, end; - - std::vector off_proc_to_new; - std::vector B_off_proc_to_new; - if (off_proc_num_cols) off_proc_to_new.resize(off_proc_num_cols, 0); - if (B->off_proc_num_cols) B_off_proc_to_new.resize(B->off_proc_num_cols, 0); - - int ctr = 0; - int ctr_B = 0; - int global_col = 0; - int global_col_B = 0; - while (ctr < off_proc_num_cols || ctr_B < B->off_proc_num_cols) - { - if (ctr < off_proc_num_cols) global_col = off_proc_column_map[ctr]; - else global_col = partition->global_num_cols; - - if (ctr_B < B->off_proc_num_cols) global_col_B = B->off_proc_column_map[ctr_B]; - else global_col_B = B->partition->global_num_cols; - - if (global_col == global_col_B) - { - off_proc_to_new[ctr++] = C->off_proc_column_map.size(); - B_off_proc_to_new[ctr_B++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col); - } - else if (global_col < global_col_B) - { - off_proc_to_new[ctr++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col); - } - else - { - B_off_proc_to_new[ctr_B++] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(global_col_B); - } - } - C->off_proc_num_cols = C->off_proc_column_map.size(); - - - C->on_proc->idx1[0] = 0; - C->off_proc->idx1[0] = 0; - for (int i = 0; i < local_num_rows; i++) - { - start = on_proc->idx1[i]; - end = on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - C->on_proc->idx2.emplace_back(on_proc->idx2[j]); - C->on_proc->vals.emplace_back(on_proc->vals[j]); - } - start = B->on_proc->idx1[i]; - end = B->on_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - C->on_proc->idx2.emplace_back(B->on_proc->idx2[j]); - C->on_proc->vals.emplace_back(-B->on_proc->vals[j]); - } - C->on_proc->idx1[i+1] = C->on_proc->idx2.size(); - - - start = off_proc->idx1[i]; - end = off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - C->off_proc->idx2.emplace_back(off_proc_to_new[off_proc->idx2[j]]); - C->off_proc->vals.emplace_back(off_proc->vals[j]); - } - start = B->off_proc->idx1[i]; - end = B->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - C->off_proc->idx2.emplace_back(B_off_proc_to_new[B->off_proc->idx2[j]]); - C->off_proc->vals.emplace_back(-B->off_proc->vals[j]); - } - C->off_proc->idx1[i+1] = C->off_proc->idx2.size(); - } - C->on_proc->nnz = C->on_proc->idx2.size(); - C->off_proc->nnz = C->off_proc->idx2.size(); - - C->on_proc_column_map.resize(on_proc_column_map.size()); - std::copy(on_proc_column_map.begin(), on_proc_column_map.end(), C->on_proc_column_map.begin()); - C->local_row_map.resize(local_row_map.size()); - std::copy(local_row_map.begin(), local_row_map.end(), C->local_row_map.begin()); - - C->on_proc->sort(); - C->on_proc->remove_duplicates(); - C->on_proc->move_diag(); - - C->off_proc->sort(); - C->off_proc->remove_duplicates(); - - if (C->off_proc_num_cols) - { - std::vector new_col(C->off_proc_num_cols, 0); - for (std::vector::iterator it = C->off_proc->idx2.begin(); - it != C->off_proc->idx2.end(); ++it) - { - new_col[*it] = 1; - } - ctr = 0; - for (int i = 0; i < C->off_proc_num_cols; i++) - { - if (new_col[i]) - new_col[i] = ctr++; - else - new_col[i] = -1; - } - C->off_proc_num_cols = ctr; - C->off_proc->n_cols = ctr; - C->off_proc_column_map.resize(ctr); - - for (std::vector::iterator it = C->off_proc->idx2.begin(); - it != C->off_proc->idx2.end(); ++it) - { - *it = new_col[*it]; - } - } - - C->local_nnz = C->on_proc->nnz + C->off_proc->nnz; - - return C; -} diff --git a/raptor/util/linalg/par_matmult.cpp b/raptor/util/linalg/par_matmult.cpp deleted file mode 100644 index ae65e0f1..00000000 --- a/raptor/util/linalg/par_matmult.cpp +++ /dev/null @@ -1,563 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "raptor/core/par_matrix.hpp" - -using namespace raptor; - -// Declare Private Methods -ParCSRMatrix* init_mat(ParCSCMatrix* A); -ParCSRMatrix* init_mat(ParCSRMatrix* A); -ParBSRMatrix* init_mat(ParBSRMatrix* A); -ParBSRMatrix* init_mat(ParBSCMatrix* A); - -ParCSRMatrix* init_mat(ParCSCMatrix* A) -{ - return new ParCSRMatrix(A->partition); -} -ParCSRMatrix* init_mat(ParCSRMatrix* A) -{ - return new ParCSRMatrix(A->partition); -} -template -ParCSRMatrix* init_mat(ParCSRMatrix* A, T* B) -{ - Partition* part = new Partition(A->partition, B->partition); - ParCSRMatrix* C = new ParCSRMatrix(part); - part->num_shared = 0; - return C; -} -ParBSRMatrix* init_mat(ParBSRMatrix* A) -{ - return new ParBSRMatrix(A->partition, A->on_proc->b_rows, A->on_proc->b_cols); -} -ParBSRMatrix* init_mat(ParBSCMatrix* A) -{ - return new ParBSRMatrix(A->partition, A->on_proc->b_rows, A->on_proc->b_cols); -} -template -ParBSRMatrix* init_mat(ParBSRMatrix* A, T* B) -{ - Partition* part = new Partition(A->partition, B->partition); - ParBSRMatrix* C = new ParBSRMatrix(part, A->on_proc->b_rows, A->on_proc->b_cols); - part->num_shared = 0; - return C; -} -template -ParCSRMatrix* init_matrix(T* A, U* B) -{ - ParCSRMatrix* C; - - if (A->partition == B->partition) - { - C = init_mat(A); - } - else - { - if (A->partition->global_num_rows == B->partition->global_num_rows && - A->partition->local_num_rows == B->partition->local_num_rows && - A->partition->first_local_row == B->partition->first_local_row && - A->partition->last_local_row == B->partition->last_local_row) - { - C = init_mat(B); - } - else if (A->partition->global_num_cols == B->partition->global_num_cols && - A->partition->local_num_cols == B->partition->local_num_cols && - A->partition->first_local_col == B->partition->first_local_col && - A->partition->last_local_col == B->partition->last_local_col) - { - C = init_mat(A); - } - else - { - C = init_mat(A, B); - } - } - - return C; -} - -ParCSRMatrix* ParCSRMatrix::mult(ParCSRMatrix* B, bool tap) -{ - if (tap) - { - return this->tap_mult(B); - } - - // Check that communication package has been initialized - if (comm == NULL) - { - comm = new ParComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize C (matrix to be returned) - ParCSRMatrix* C = init_matrix(this, B); - std::vector send_buffer; - - // Communicate data and multiply - comm->init_par_mat_comm(B, send_buffer); - - // Fully Local Computation - CSRMatrix* C_on_on = on_proc->mult((CSRMatrix*) B->on_proc); - CSRMatrix* C_on_off = on_proc->mult((CSRMatrix*) B->off_proc); - - CSRMatrix* recv_mat = comm->complete_mat_comm(); - - mult_helper(B, C, recv_mat, C_on_on, C_on_off); - - delete C_on_on; - delete C_on_off; - delete recv_mat; - - // Return matrix containing product - return C; -} - -ParCSRMatrix* ParCSRMatrix::tap_mult(ParCSRMatrix* B) -{ - // Check that communication package has been initialized - if (tap_mat_comm == NULL) - { - // Always 2-step - tap_mat_comm = new TAPComm(partition, off_proc_column_map, - on_proc_column_map, false); - } - - // Initialize C (matrix to be returned) - ParCSRMatrix* C = init_matrix(this, B);; - std::vector send_buffer; - - // Communicate data and multiply - tap_mat_comm->init_par_mat_comm(B, send_buffer); - - // Fully Local Computation - CSRMatrix* C_on_on = on_proc->mult((CSRMatrix*) B->on_proc); - CSRMatrix* C_on_off = on_proc->mult((CSRMatrix*) B->off_proc); - - CSRMatrix* recv_mat = tap_mat_comm->complete_mat_comm(); - - mult_helper(B, C, recv_mat, C_on_on, C_on_off); - delete C_on_on; - delete C_on_off; - delete recv_mat; - - // Return matrix containing product - return C; -} - -ParCSRMatrix* ParCSRMatrix::mult_T(ParCSRMatrix* A, bool tap) -{ - ParCSCMatrix* Acsc = A->to_ParCSC(); - ParCSRMatrix* C = this->mult_T(Acsc, tap); - delete Acsc; - return C; -} - -ParCSRMatrix* ParCSRMatrix::tap_mult_T(ParCSRMatrix* A) -{ - ParCSCMatrix* Acsc = A->to_ParCSC(); - ParCSRMatrix* C = this->tap_mult_T(Acsc); - delete Acsc; - return C; -} - -ParCSRMatrix* ParCSRMatrix::mult_T(ParCSCMatrix* A, bool tap) -{ - if (tap) - { - return this->tap_mult_T(A); - } - - if (A->comm == NULL) - { - A->comm = new ParComm(A->partition, A->off_proc_column_map, A->on_proc_column_map); - } - - // Initialize C (matrix to be returned) - ParCSRMatrix* C = init_matrix(this, A);; - - CSRMatrix* Ctmp = mult_T_partial(A); - std::vector send_buffer; - - A->comm->init_mat_comm_T(send_buffer, Ctmp->idx1, Ctmp->idx2, - Ctmp->vals); - - CSRMatrix* C_on_on = on_proc->mult_T((CSCMatrix*) A->on_proc); - CSRMatrix* C_off_on = off_proc->mult_T((CSCMatrix*) A->on_proc); - - CSRMatrix* recv_mat = A->comm->complete_mat_comm_T(A->on_proc_num_cols); - - mult_T_combine(A, C, recv_mat, C_on_on, C_off_on); - - // Clean up - delete Ctmp; - delete C_on_on; - delete C_off_on; - delete recv_mat; - - // Return matrix containing product - return C; -} - -ParCSRMatrix* ParCSRMatrix::tap_mult_T(ParCSCMatrix* A) -{ - if (A->tap_mat_comm == NULL) - { - A->tap_mat_comm = new TAPComm(A->partition, A->off_proc_column_map, - A->on_proc_column_map, false); - } - - // Initialize C (matrix to be returned) - ParCSRMatrix* C = init_matrix(this, A); - - CSRMatrix* Ctmp = mult_T_partial(A); - std::vector send_buffer; - - A->tap_mat_comm->init_mat_comm_T(send_buffer, Ctmp->idx1, Ctmp->idx2, - Ctmp->vals); - - CSRMatrix* C_on_on = on_proc->mult_T((CSCMatrix*) A->on_proc); - CSRMatrix* C_off_on = off_proc->mult_T((CSCMatrix*) A->on_proc); - - CSRMatrix* recv_mat = A->tap_mat_comm->complete_mat_comm_T(A->on_proc_num_cols); - - mult_T_combine(A, C, recv_mat, C_on_on, C_off_on); - - // Clean up - delete Ctmp; - delete recv_mat; - delete C_on_on; - delete C_off_on; - - // Return matrix containing product - return C; -} - -ParMatrix* ParMatrix::mult(ParCSRMatrix* B, bool tap) -{ - int rank; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - if (rank == 0) - printf("Multiplication is not implemented for these ParMatrix types.\n"); - return NULL; -} - -void ParCSRMatrix::mult_helper(ParCSRMatrix* B, ParCSRMatrix* C, - CSRMatrix* recv_mat, CSRMatrix* C_on_on, CSRMatrix* C_on_off) -{ - // Set dimensions of C - C->global_num_rows = global_num_rows; - C->global_num_cols = B->global_num_cols; - C->local_num_rows = local_num_rows; - - C->on_proc_column_map = B->get_on_proc_column_map(); - C->local_row_map = get_local_row_map(); - C->on_proc_num_cols = C->on_proc_column_map.size(); - - // Initialize nnz as 0 (will increment this as nonzeros are added) - C->local_nnz = 0; - - // Declare Variables - int row_start, row_end; - int global_col; - - // Split recv_mat into on and off proc portions - CSRMatrix* recv_on = new CSRMatrix(recv_mat->n_rows, -1); - CSRMatrix* recv_off = new CSRMatrix(recv_mat->n_rows, -1); - - int* part_to_col = B->map_partition_to_local(); - recv_on->idx1[0] = 0; - recv_off->idx1[0] = 0; - for (int i = 0; i < recv_mat->n_rows; i++) - { - row_start = recv_mat->idx1[i]; - row_end = recv_mat->idx1[i+1]; - for (int j = row_start; j < row_end; j++) - { - global_col = recv_mat->idx2[j]; - if (global_col < B->partition->first_local_col || - global_col > B->partition->last_local_col) - { - recv_off->idx2.emplace_back(global_col); - recv_off->vals.emplace_back(recv_mat->vals[j]); - } - else - { - recv_on->idx2.emplace_back(part_to_col[global_col - - B->partition->first_local_col]); - recv_on->vals.emplace_back(recv_mat->vals[j]); - } - } - recv_on->idx1[i+1] = recv_on->idx2.size(); - recv_off->idx1[i+1] = recv_off->idx2.size(); - } - recv_on->nnz = recv_on->idx2.size(); - recv_off->nnz = recv_off->idx2.size(); - delete[] part_to_col; - - // Calculate global_to_C and B_to_C column maps - std::map global_to_C; - std::vector B_to_C(B->off_proc_num_cols); - - std::copy(recv_off->idx2.begin(), recv_off->idx2.end(), - std::back_inserter(C->off_proc_column_map)); - for (std::vector::iterator it = B->off_proc_column_map.begin(); - it != B->off_proc_column_map.end(); ++it) - { - C->off_proc_column_map.emplace_back(*it); - } - std::sort(C->off_proc_column_map.begin(), C->off_proc_column_map.end()); - - int prev_col = -1; - C->off_proc_num_cols = 0; - for (std::vector::iterator it = C->off_proc_column_map.begin(); - it != C->off_proc_column_map.end(); ++it) - { - if (*it != prev_col) - { - global_to_C[*it] = C->off_proc_num_cols; - C->off_proc_column_map[C->off_proc_num_cols++] = *it; - prev_col = *it; - } - } - C->off_proc_column_map.resize(C->off_proc_num_cols); - - for (int i = 0; i < B->off_proc_num_cols; i++) - { - global_col = B->off_proc_column_map[i]; - B_to_C[i] = global_to_C[global_col]; - } - for (std::vector::iterator it = recv_off->idx2.begin(); - it != recv_off->idx2.end(); ++it) - { - *it = global_to_C[*it]; - } - - for (std::vector::iterator it = C_on_off->idx2.begin(); - it != C_on_off->idx2.end(); ++it) - { - *it = B_to_C[*it]; - } - C->off_proc_num_cols = C->off_proc_column_map.size(); - recv_on->n_cols = B->on_proc->n_cols; - recv_off->n_cols = C->off_proc_num_cols; - C_on_off->n_cols = C->off_proc_num_cols; - - // Multiply A->off_proc * B->recv_on -> C_off_on - CSRMatrix* C_off_on = off_proc->mult(recv_on); - delete recv_on; - - // Multiply A->off_proc * B->recv_off -> C_off_off - CSRMatrix* C_off_off = off_proc->mult(recv_off); - delete recv_off; - - // Create C->on_proc by adding C_on_on + C_off_on - C_on_on->add_append(C_off_on, (CSRMatrix*) C->on_proc); - delete C_off_on; - - // Create C->off_proc by adding C_off_on + C_off_off - C_on_off->add_append(C_off_off, (CSRMatrix*) C->off_proc); - delete C_off_off; - - C->local_nnz = C->on_proc->nnz + C->off_proc->nnz; -} - -CSRMatrix* ParCSRMatrix::mult_T_partial(CSCMatrix* A_off) -{ - CSRMatrix* C_off_on = on_proc->mult_T(A_off, on_proc_column_map.data()); - CSRMatrix* C_off_off = off_proc->mult_T(A_off, off_proc_column_map.data()); - CSRMatrix* Ctmp = C_off_on->add(C_off_off, false); - - delete C_off_on; - delete C_off_off; - - return Ctmp; -} - -// A_T * self -CSRMatrix* ParCSRMatrix::mult_T_partial(ParCSCMatrix* A) -{ - // Declare Variables - return mult_T_partial((CSCMatrix*) A->off_proc); -} - -void ParCSRMatrix::mult_T_combine(ParCSCMatrix* P, ParCSRMatrix* C, CSRMatrix* recv_mat, - CSRMatrix* C_on_on, CSRMatrix* C_off_on) -{ - int start, end, ctr; - int col, col_C; - - std::vector sums; - std::vector next; - - // Split recv_mat into recv_on and recv_off - // Split recv_mat into on and off proc portions - CSRMatrix* recv_on = new CSRMatrix(recv_mat->n_rows, -1); - CSRMatrix* recv_off = new CSRMatrix(recv_mat->n_rows, -1); - for (int i = 0; i < recv_mat->n_rows; i++) - { - start = recv_mat->idx1[i]; - end = recv_mat->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = recv_mat->idx2[j]; - if (col < partition->first_local_col - || col > partition->last_local_col) - { - recv_off->idx2.emplace_back(col); - recv_off->vals.emplace_back(recv_mat->vals[j]); - } - else - { - recv_on->idx2.emplace_back(col); - recv_on->vals.emplace_back(recv_mat->vals[j]); - } - } - recv_on->idx1[i+1] = recv_on->idx2.size(); - recv_off->idx1[i+1] = recv_off->idx2.size(); - } - recv_on->nnz = recv_on->idx2.size(); - recv_off->nnz = recv_off->idx2.size(); - - - // Set dimensions of C - C->global_num_rows = P->global_num_cols; // AT global rows - C->global_num_cols = global_num_cols; - C->local_num_rows = P->on_proc_num_cols; // AT local rows - - // Initialize nnz as 0 (will increment this as nonzeros are added) - C->local_nnz = 0; - - /****************************** - * Form on_proc - ******************************/ - // Resize variables in on_proc - C->on_proc_column_map = get_on_proc_column_map(); - C->local_row_map = P->get_on_proc_column_map(); - C->on_proc_num_cols = C->on_proc_column_map.size(); - - // Update recv_on columns (to match local cols) - int* part_to_col = map_partition_to_local(); - for (std::vector::iterator it = recv_on->idx2.begin(); - it != recv_on->idx2.end(); ++it) - { - *it = part_to_col[(*it - partition->first_local_col)]; - } - delete[] part_to_col; - - // Multiply on_proc - recv_on->n_cols = C->on_proc_num_cols; - C_on_on->add_append(recv_on, (CSRMatrix*) C->on_proc); - - /****************************** - * Form off_proc - ******************************/ - // Calculate global_to_C and map_to_C column maps - std::map global_to_C; - std::vector map_to_C; - if (off_proc_num_cols) - { - map_to_C.reserve(off_proc_num_cols); - } - - // Create set of global columns in B_off_proc and recv_mat - std::set C_col_set; - for (std::vector::iterator it = recv_off->idx2.begin(); - it != recv_off->idx2.end(); ++it) - { - C_col_set.insert(*it); - } - for (std::vector::iterator it = off_proc_column_map.begin(); - it != off_proc_column_map.end(); ++it) - { - C_col_set.insert(*it); - } - - C->off_proc_num_cols = C_col_set.size(); - if (C->off_proc_num_cols) - { - C->off_proc_column_map.reserve(C->off_proc_num_cols); - } - for (std::set::iterator it = C_col_set.begin(); - it != C_col_set.end(); ++it) - { - global_to_C[*it] = C->off_proc_column_map.size(); - C->off_proc_column_map.emplace_back(*it); - } - - // Map local off_proc_cols to C->off_proc_column_map - for (std::vector::iterator it = off_proc_column_map.begin(); - it != off_proc_column_map.end(); ++it) - { - col_C = global_to_C[*it]; - map_to_C.emplace_back(col_C); - } - - // Update recvd cols from global_col to local col in C - for (std::vector::iterator it = recv_off->idx2.begin(); - it != recv_off->idx2.end(); ++it) - { - *it = global_to_C[*it]; - } - - recv_off->n_cols = C->off_proc_num_cols; - for (std::vector::iterator it = C_off_on->idx2.begin(); - it != C_off_on->idx2.end(); ++it) - { - *it = map_to_C[*it]; - } - C_off_on->add_append(recv_off, (CSRMatrix*) C->off_proc); - - C->local_nnz = C->on_proc->nnz + C->off_proc->nnz; - - // Condense columns! A lot of them are zero columns... - // Could instead add global column indices, and then map to local - std::vector off_col_sizes; - std::vector col_orig_to_new; - if (C->off_proc_num_cols) - { - off_col_sizes.resize(C->off_proc_num_cols, 0); - col_orig_to_new.resize(C->off_proc_num_cols); - } - for (int i = 0; i < C->local_num_rows; i++) - { - start = C->off_proc->idx1[i]; - end = C->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - off_col_sizes[C->off_proc->idx2[j]]++; - } - } - ctr = 0; - for (int i = 0; i < C->off_proc_num_cols; i++) - { - if (off_col_sizes[i]) - { - col_orig_to_new[i] = ctr; - C->off_proc_column_map[ctr++] = C->off_proc_column_map[i]; - } - } - C->off_proc_num_cols = ctr; - C->off_proc->n_cols = ctr; - if (ctr) - { - C->off_proc_column_map.resize(ctr); - } - else - { - C->off_proc_column_map.clear(); - } - for (int i = 0; i < C->local_num_rows; i++) - { - start = C->off_proc->idx1[i]; - end = C->off_proc->idx1[i+1]; - for (int j = start; j < end; j++) - { - col = C->off_proc->idx2[j]; - C->off_proc->idx2[j] = col_orig_to_new[col]; - } - } - - delete recv_on; - delete recv_off; -} - diff --git a/raptor/util/linalg/par_spmv.cpp b/raptor/util/linalg/par_spmv.cpp deleted file mode 100644 index c2596955..00000000 --- a/raptor/util/linalg/par_spmv.cpp +++ /dev/null @@ -1,342 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "raptor/core/types.hpp" -#include "raptor/core/par_matrix.hpp" -#include "raptor/core/par_vector.hpp" - -#include "assert.h" - -using namespace raptor; - -/************************************************************** - ***** Parallel Matrix-Vector Multiplication - ************************************************************** - ***** Performs parallel matrix-vector multiplication - ***** b = A*x - ***** - ***** Parameters - ***** ------------- - ***** x : ParVector* - ***** Parallel vector to be multiplied - ***** b : ParVector* - ***** Parallel vector result is returned in - **************************************************************/ -void ParMatrix::mult(ParVector& x, ParVector& b, bool tap) -{ - if (tap) - { - this->tap_mult(x, b); - return; - } - - // Check that communication package has been initialized - if (comm == NULL) - { - comm = new ParComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - comm->init_comm(x, off_proc->b_cols); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows) - { - on_proc->mult(x.local, b.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append(x_tmp, b.local); - } -} - -void ParMatrix::tap_mult(ParVector& x, ParVector& b) -{ - // Check that communication package has been initialized - if (tap_comm == NULL) - { - tap_comm = new TAPComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - tap_comm->init_comm(x, off_proc->b_cols); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows) - { - on_proc->mult(x.local, b.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = tap_comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append(x_tmp, b.local); - } -} - -void ParMatrix::mult_append(ParVector& x, ParVector& b, bool tap) -{ - if (tap) - { - this->tap_mult_append(x, b); - return; - } - - // Check that communication package has been initialized - if (comm == NULL) - { - comm = new ParComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - comm->init_comm(x, off_proc->b_cols); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows) - { - on_proc->mult_append(x.local, b.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append(x_tmp, b.local); - } -} - -void ParMatrix::tap_mult_append(ParVector& x, ParVector& b) -{ - // Check that communication package has been initialized - if (tap_comm == NULL) - { - tap_comm = new TAPComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - tap_comm->init_comm(x, off_proc->b_cols); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows) - { - on_proc->mult_append(x.local, b.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = tap_comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append(x_tmp, b.local); - } -} - -void ParMatrix::mult_T(ParVector& x, ParVector& b, bool tap) -{ - if (tap) - { - this->tap_mult_T(x, b); - return; - } - - // Check that communication package has been initialized - if (comm == NULL) - { - comm = new ParComm(partition, off_proc_column_map, on_proc_column_map); - } - - std::vector& x_tmp = comm->get_buffer(); - if ((int)x_tmp.size() < comm->recv_data->size_msgs * off_proc->b_cols) - x_tmp.resize(comm->recv_data->size_msgs * off_proc->b_cols); - - off_proc->mult_T(x.local, x_tmp); - - comm->init_comm_T(x_tmp, off_proc->b_cols); - - if (local_num_rows) - { - on_proc->mult_T(x.local, b.local); - } - - comm->complete_comm_T(b.local.values, off_proc->b_cols); -} - -void ParMatrix::tap_mult_T(ParVector& x, ParVector& b) -{ - // Check that communication package has been initialized - if (tap_comm == NULL) - { - tap_comm = new TAPComm(partition, off_proc_column_map, on_proc_column_map); - } - - std::vector& x_tmp = tap_comm->get_buffer(); - if ((int)x_tmp.size() < tap_comm->recv_size * off_proc->b_cols) - x_tmp.resize(tap_comm->recv_size * off_proc->b_cols); - - off_proc->mult_T(x.local, x_tmp); - - tap_comm->init_comm_T(x_tmp, off_proc->b_cols); - - if (local_num_rows) - { - on_proc->mult_T(x.local, b.local); - } - - tap_comm->complete_comm_T(b.local.values, off_proc->b_cols); -} - -void ParMatrix::residual(ParVector& x, ParVector& b, ParVector& r, bool tap) -{ - if (tap) - { - this->tap_residual(x, b, r); - return; - } - - // Check that communication package has been initialized - if (comm == NULL) - { - comm = new ParComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - comm->init_comm(x, off_proc->b_cols); - - std::copy(b.local.values.begin(), b.local.values.end(), - r.local.values.begin()); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows && on_proc_num_cols) - { - on_proc->residual(x.local, b.local, r.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append_neg(x_tmp, r.local); - } -} - -void ParMatrix::tap_residual(ParVector& x, ParVector& b, ParVector& r) -{ - // Check that communication package has been initialized - if (tap_comm == NULL) - { - tap_comm = new TAPComm(partition, off_proc_column_map, on_proc_column_map); - } - - // Initialize Isends and Irecvs to communicate - // values of x - tap_comm->init_comm(x, off_proc->b_cols); - - std::copy(b.local.values.begin(), b.local.values.end(), r.local.values.begin()); - - // Multiply the diagonal portion of the matrix, - // setting b = A_diag*x_local - if (local_num_rows && on_proc_num_cols) - { - on_proc->mult_append_neg(x.local, r.local); - } - - // Wait for Isends and Irecvs to complete - std::vector& x_tmp = tap_comm->complete_comm(off_proc->b_cols); - - // Multiply remaining columns, appending to previous - // solution in b (b += A_offd * x_distant) - if (off_proc_num_cols) - { - off_proc->mult_append_neg(x_tmp, r.local); - } -} - - -void ParCOOMatrix::mult(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult(x, b, tap); -} - -void ParCSRMatrix::mult(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult(x, b, tap); -} - -void ParCSCMatrix::mult(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult(x, b, tap); -} - -void ParCOOMatrix::tap_mult(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult(x, b); -} - -void ParCSRMatrix::tap_mult(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult(x, b); -} - -void ParCSCMatrix::tap_mult(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult(x, b); -} - -void ParCOOMatrix::mult_T(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult_T(x, b, tap); -} - -void ParCSRMatrix::mult_T(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult_T(x, b, tap); -} - -void ParCSCMatrix::mult_T(ParVector& x, ParVector& b, bool tap) -{ - ParMatrix::mult_T(x, b, tap); -} - -void ParCOOMatrix::tap_mult_T(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult_T(x, b); -} - -void ParCSRMatrix::tap_mult_T(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult_T(x, b); -} - -void ParCSCMatrix::tap_mult_T(ParVector& x, ParVector& b) -{ - ParMatrix::tap_mult_T(x, b); -} - diff --git a/raptor/util/linalg/repartition.cpp b/raptor/util/linalg/repartition.cpp deleted file mode 100644 index 82312331..00000000 --- a/raptor/util/linalg/repartition.cpp +++ /dev/null @@ -1,392 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -#include "repartition.hpp" - -namespace raptor { -void make_contiguous(ParCSRMatrix* A, std::vector& off_proc_part_map) -{ - int rank, num_procs; - RAPtor_MPI_Comm_rank(RAPtor_MPI_COMM_WORLD, &rank); - RAPtor_MPI_Comm_size(RAPtor_MPI_COMM_WORLD, &num_procs); - - std::map global_to_local; - std::vector proc_num_cols(num_procs); - std::vector recvvec; - - int ctr = 0; - for (std::vector::const_iterator it = A->off_proc_column_map.begin(); - it != A->off_proc_column_map.end(); ++it) - { - global_to_local[*it] = ctr++; - } - - // Find how many columns are local to each process - RAPtor_MPI_Allgather(&(A->on_proc_num_cols), 1, RAPtor_MPI_INT, proc_num_cols.data(), 1, RAPtor_MPI_INT, - RAPtor_MPI_COMM_WORLD); - - // Determine the new first local row / first local col of rank - A->partition->first_local_col = 0; - for (int i = 0; i < rank; i++) - { - A->partition->first_local_col += proc_num_cols[i]; - } - A->partition->first_local_row = A->partition->first_local_col; - - // Determine the global number of columns and rows - A->global_num_cols = A->partition->first_local_col; - for (int i = rank; i < num_procs; i++) - { - A->global_num_cols += proc_num_cols[i]; - } - A->global_num_rows = A->global_num_cols; - - A->comm = new ParComm(A->partition->topology, A->off_proc_column_map, - off_proc_part_map, A->local_row_map); - - for (int i = 0; i < A->local_num_rows; i++) - { - A->on_proc_column_map[i] = A->partition->first_local_col + i; - } - A->local_row_map = A->get_on_proc_column_map(); - recvvec = A->comm->communicate(A->local_row_map); - for (int i = 0; i < A->off_proc_num_cols; i++) - A->off_proc_column_map[i] = recvvec[i]; - - - // Sort rows, removing duplicate entries and moving diagonal - // value to first - A->on_proc->sort(); - A->on_proc->move_diag(); - A->off_proc->sort(); - -} - -ParCSRMatrix* repartition_matrix(ParCSRMatrix* A, int* partition, std::vector& new_local_rows) -{ - int rank, num_procs; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); - - - ParCSRMatrix* A_part = NULL; - - int proc, start, end; - int row_start, row_end, row_size; - int num_sends, num_recvs; - int proc_idx, idx, ctr, prev_ctr; - int row, col, global_row, global_col; - int count, first_row; - int recv_size; - double val; - std::vector proc_rows(num_procs, 0); - std::vector proc_to_idx(num_procs); - std::vector send_procs(num_procs); - std::vector send_ptr; - std::vector send_requests; - std::vector send_indices; - std::vector send_buffer; - std::vector recv_buffer; - std::vector recv_rows; - std::vector recv_row_ptr; - std::vector recv_cols; - std::vector recv_vals; - MPI_Status recv_status; - - int num_ints = 2*A->local_num_rows + A->local_nnz; - int num_dbls = A->local_nnz; - int int_bytes, dbl_bytes; - MPI_Pack_size(num_ints, MPI_INT, MPI_COMM_WORLD, &int_bytes); - MPI_Pack_size(num_dbls, MPI_DOUBLE, MPI_COMM_WORLD, &dbl_bytes); - - int tag = 29485; - - std::vector off_parts(A->off_proc_num_cols); - std::vector& recvvec = A->comm->communicate(partition); - std::copy(recvvec.begin(), recvvec.end(), off_parts.begin()); - - num_sends = 0; - for (int i = 0; i < A->local_num_rows; i++) - { - proc = partition[i]; - if (proc_rows[proc] == 0) - { - send_procs[num_sends++] = proc; - } - proc_rows[proc]++; - } - send_procs.resize(num_sends); - - - send_ptr.resize(num_sends+1); - send_requests.resize(num_sends); - send_ptr[0] = 0; - for (int i = 0; i < num_sends; i++) - { - proc = send_procs[i]; - proc_to_idx[proc] = i; - send_ptr[i+1] = send_ptr[i] + proc_rows[proc]; - proc_rows[proc] = 0; - } - - send_indices.resize(A->local_num_rows); - for (int i = 0; i < A->local_num_rows; i++) - { - proc = partition[i]; - proc_idx = proc_to_idx[proc]; - idx = send_ptr[proc_idx] + proc_rows[proc]++; - send_indices[idx] = i; - } - for (int i = 0; i < num_sends; i++) - { - proc = send_procs[i]; - proc_rows[proc] = 1; - } - MPI_Allreduce(MPI_IN_PLACE, proc_rows.data(), num_procs, MPI_INT, MPI_SUM, - MPI_COMM_WORLD); - num_recvs = proc_rows[rank]; - - // TODO -- send partitions for each global col (both on and off proc) if part[row] != part[col] - std::vector col_bool(A->local_num_rows, 0); - std::vector off_col_bool(A->off_proc_num_cols, 0); - std::vector send_cols(A->local_num_rows); - std::vector off_send_cols(A->off_proc_num_cols); - int n_cols, off_n_cols; - int n_rows, part; - int off_col_size = 2 * (A->off_proc_num_cols + A->local_num_rows) * num_sends; - int off_col_bytes, row_bytes; - MPI_Pack_size(off_col_size, MPI_INT, MPI_COMM_WORLD, &off_col_bytes); - MPI_Pack_size(num_sends, MPI_INT, MPI_COMM_WORLD, &row_bytes); - send_buffer.resize(int_bytes + dbl_bytes + off_col_bytes + row_bytes); - ctr = 0; - for (int i = 0; i < num_sends; i++) - { - prev_ctr = ctr; - proc = send_procs[i]; - start = send_ptr[i]; - end = send_ptr[i+1]; - n_rows = end - start; - proc_rows[proc] = n_rows; - MPI_Pack(&n_rows, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - n_cols = 0; - off_n_cols = 0; - for (int j = start; j < end; j++) - { - row = send_indices[j]; - global_row = A->local_row_map[row]; - row_size = A->on_proc->idx1[row+1] - A->on_proc->idx1[row] - + A->off_proc->idx1[row+1] - A->off_proc->idx1[row]; - - MPI_Pack(&global_row, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - MPI_Pack(&row_size, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - - row_start = A->on_proc->idx1[row]; - row_end = A->on_proc->idx1[row+1]; - for (int k = row_start; k < row_end; k++) - { - col = A->on_proc->idx2[k]; - global_col = A->on_proc_column_map[col]; - val = A->on_proc->vals[k]; - MPI_Pack(&global_col, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - MPI_Pack(&val, 1, MPI_DOUBLE, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - if (partition[col] != proc && col_bool[col] == 0) - { - send_cols[n_cols++] = col; - col_bool[col] = 1; - } - } - row_start = A->off_proc->idx1[row]; - row_end = A->off_proc->idx1[row+1]; - for (int k = row_start; k < row_end; k++) - { - col = A->off_proc->idx2[k]; - global_col = A->off_proc_column_map[col]; - val = A->off_proc->vals[k]; - MPI_Pack(&global_col, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - MPI_Pack(&val, 1, MPI_DOUBLE, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - if (off_parts[col] != proc && off_col_bool[col] == 0) - { - off_send_cols[off_n_cols++] = col; - off_col_bool[col] = 1; - } - } - } - for (int j = 0; j < n_cols; j++) - { - col = send_cols[j]; - col_bool[col] = 0; - global_col = A->local_row_map[col]; - part = partition[col]; - MPI_Pack(&global_col, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - MPI_Pack(&part, 1, MPI_INT, send_buffer.data(), send_buffer.size(), &ctr, - MPI_COMM_WORLD); - } - for (int j = 0; j < off_n_cols; j++) - { - col = off_send_cols[j]; - off_col_bool[col] = 0; - global_col = A->off_proc_column_map[col]; - part = off_parts[col]; - MPI_Pack(&global_col, 1, MPI_INT, send_buffer.data(), send_buffer.size(), - &ctr, MPI_COMM_WORLD); - MPI_Pack(&part, 1, MPI_INT, send_buffer.data(), send_buffer.size(), &ctr, - MPI_COMM_WORLD); - } - MPI_Isend(&(send_buffer[prev_ctr]), ctr - prev_ctr, MPI_PACKED, proc, tag, - MPI_COMM_WORLD, &(send_requests[i])); - } - - std::map off_proc_to_local; - std::vector off_col_to_global; - std::vector off_col_parts; - recv_size = 0; - recv_row_ptr.push_back(recv_size); - for (int i = 0; i < num_recvs; i++) - { - MPI_Probe(MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &recv_status); - proc = recv_status.MPI_SOURCE; - MPI_Get_count(&recv_status, MPI_PACKED, &count); - recv_buffer.resize(count); - MPI_Recv(recv_buffer.data(), count, MPI_PACKED, proc, tag, MPI_COMM_WORLD, - &recv_status); - - ctr = 0; - - MPI_Unpack(recv_buffer.data(), count, &ctr, &n_rows, 1, MPI_INT, - MPI_COMM_WORLD); - for (int j = 0; j < n_rows; j++) - { - MPI_Unpack(recv_buffer.data(), count, &ctr, &global_row, 1, MPI_INT, - MPI_COMM_WORLD); - recv_rows.push_back(global_row); - MPI_Unpack(recv_buffer.data(), count, &ctr, &row_size, 1, MPI_INT, - MPI_COMM_WORLD); - recv_size += row_size; - recv_row_ptr.push_back(recv_size); - for (int k = 0; k < row_size; k++) - { - MPI_Unpack(recv_buffer.data(), count, &ctr, &global_col, 1, MPI_INT, - MPI_COMM_WORLD); - recv_cols.push_back(global_col); - MPI_Unpack(recv_buffer.data(), count, &ctr, &val, 1, MPI_DOUBLE, - MPI_COMM_WORLD); - recv_vals.push_back(val); - } - } - while (ctr < count) - { - MPI_Unpack(recv_buffer.data(), count, &ctr, &global_col, 1, MPI_INT, - MPI_COMM_WORLD); - MPI_Unpack(recv_buffer.data(), count, &ctr, &part, 1, MPI_INT, MPI_COMM_WORLD); - if (off_proc_to_local.find(global_col) == off_proc_to_local.end()) - { - off_proc_to_local[global_col] = off_col_to_global.size(); - off_col_to_global.push_back(global_col); - off_col_parts.push_back(part); - } - - } - } - - int num_rows = recv_rows.size(); - MPI_Waitall(num_sends, send_requests.data(), MPI_STATUSES_IGNORE); - - MPI_Allgather(&num_rows, 1, MPI_INT, proc_rows.data(), 1, MPI_INT, MPI_COMM_WORLD); - first_row = 0; - for (int i = 0; i < rank; i++) - { - first_row += proc_rows[i]; - } - - A_part = new ParCSRMatrix(A->global_num_rows, A->global_num_rows, num_rows, num_rows, - first_row, first_row, A->partition->topology); - A_part->off_proc_num_cols = off_col_parts.size(); - A_part->off_proc_column_map.resize(A_part->off_proc_num_cols); - A_part->on_proc_column_map.resize(A_part->local_num_rows); - - std::vector off_proc_part_map(A_part->off_proc_num_cols); - std::vector off_col_order(A_part->off_proc_num_cols); - std::iota(off_col_order.begin(), off_col_order.end(), 0); - std::sort(off_col_order.begin(), off_col_order.end(), - [&](const int i, const int j) - { - if (off_col_parts[i] == off_col_parts[j]) - return off_col_to_global[i] < off_col_to_global[j]; - return off_col_parts[i] < off_col_parts[j]; - }); - for (int i = 0; i < A_part->off_proc_num_cols; i++) - { - col = off_col_order[i]; - global_col = off_col_to_global[col]; - off_proc_to_local[global_col] = i; - A_part->off_proc_column_map[i] = global_col; - off_proc_part_map[i] = off_col_parts[col]; - } - - // Create row_ptr - // Add values/indices to appropriate positions - std::vector row_order(A_part->local_num_rows); - std::iota(row_order.begin(), row_order.end(), 0); - std::sort(row_order.begin(), row_order.end(), - [&](const int i, const int j) - { - return recv_rows[i] < recv_rows[j]; - }); - std::map on_proc_to_local; - for(int i = 0; i < num_rows; i++) - { - row = row_order[i]; - global_row = recv_rows[row]; - on_proc_to_local[global_row] = i; - A_part->on_proc_column_map[i] = global_row; - } - A_part->local_row_map = A_part->get_on_proc_column_map(); - A_part->on_proc_num_cols = A_part->on_proc_column_map.size(); - - A_part->on_proc->idx1[0] = 0; - A_part->off_proc->idx1[0] = 0; - for (int i = 0; i < num_rows; i++) - { - row = row_order[i]; - row_start = recv_row_ptr[row]; - row_end = recv_row_ptr[row+1]; - for (int j = row_start; j < row_end; j++) - { - col = recv_cols[j]; - val = recv_vals[j]; - - if (on_proc_to_local.find(col) != on_proc_to_local.end()) - { - A_part->on_proc->idx2.push_back(on_proc_to_local[col]); - A_part->on_proc->vals.push_back(val); - } - else - { - A_part->off_proc->idx2.push_back(off_proc_to_local[col]); - A_part->off_proc->vals.push_back(val); - } - } - A_part->on_proc->idx1[i+1] = A_part->on_proc->idx2.size(); - A_part->off_proc->idx1[i+1] = A_part->off_proc->idx2.size(); - } - A_part->on_proc->nnz = A_part->on_proc->idx2.size(); - A_part->off_proc->nnz = A_part->off_proc->idx2.size(); - A_part->local_nnz = A_part->on_proc->nnz + A_part->off_proc->nnz; - - new_local_rows.resize(A_part->on_proc_num_cols); - std::copy(A_part->on_proc_column_map.begin(), A_part->on_proc_column_map.end(), - new_local_rows.begin()); - - make_contiguous(A_part, off_proc_part_map); - - return A_part; -} - -} diff --git a/raptor/util/linalg/repartition.hpp b/raptor/util/linalg/repartition.hpp deleted file mode 100644 index 2bd704af..00000000 --- a/raptor/util/linalg/repartition.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause -// -#ifndef RAPTOR_GALLERY_REPARTITION_HPP -#define RAPTOR_GALLERY_REPARTITION_HPP - -#include -#include -#include -#include - -#include "raptor/core/types.hpp" -#include "raptor/core/mpi_types.hpp" -#include "raptor/core/par_matrix.hpp" - -namespace raptor { - -ParCSRMatrix* repartition_matrix(ParCSRMatrix* A, int* partition, std::vector& new_local_rows); -void make_contiguous(ParCSRMatrix* A); - -} -#endif diff --git a/raptor/util/linalg/spmv.cpp b/raptor/util/linalg/spmv.cpp deleted file mode 100644 index cefd8f72..00000000 --- a/raptor/util/linalg/spmv.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Copyright (c) 2015-2017, RAPtor Developer Team -// License: Simplified BSD, http://opensource.org/licenses/BSD-2-Clause - -#include "raptor/core/matrix.hpp" - -using namespace raptor; - -// Declare Private Methods -void CSR_spmv(const CSRMatrix* A, const double* x, double* b); -void CSR_residual(const CSRMatrix* A, const double* x, - const double* b, double* r); -void CSR_append(const CSRMatrix* A, const double* x, double* b); -void BSR_spmv(const BSRMatrix* A, const double* x, double* b); - -// COOMatrix SpMV Methods (or BCOO) -template -void COO_append(const COOMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - for (int i = 0; i < A->nnz; i++) - { - A->append(A->idx1[i], A->idx2[i], b, x, vals[i]); - } -} -template -void COO_append_T(const COOMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - for (int i = 0; i < A->nnz; i++) - { - A->append_T(A->idx2[i], A->idx1[i], b, x, vals[i]); - } -} -template -void COO_append_neg(const COOMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - for (int i = 0; i < A->nnz; i++) - { - A->append_neg(A->idx1[i], A->idx2[i], b, x, vals[i]); - } -} -template -void COO_append_neg_T(const COOMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - for (int i = 0; i < A->nnz; i++) - { - A->append_neg_T(A->idx1[i], A->idx2[i], b, x, vals[i]); - } -} - - - - - -// CSRMatrix SpMV Methods (or BSR) -// Optimized CSR and BSR standard SpMVs -void CSR_spmv(const CSRMatrix* A, const double* x, double* b) -{ - int start, end; - double val; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - val = 0; - for (int j = start; j < end; j++) - { - val += A->vals[j] * x[A->idx2[j]]; - } - b[i] = val; - } -} - -void CSR_residual(const CSRMatrix* A, const double* x, - const double* b, double* r) -{ - int start, end; - double val; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - val = b[i]; - for (int j = start; j < end; j++) - { - val -= A->vals[j] * x[A->idx2[j]]; - } - r[i] = val; - } -} - - -void CSR_append(const CSRMatrix* A, const double* x, double* b) -{ - int start, end; - double val; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - val = 0; - for (int j = start; j < end; j++) - { - val += A->vals[j] * x[A->idx2[j]]; - } - b[i] += val; - } -} - -template -void BSR_append(const CSRMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append(i, A->idx2[j], b, x, vals[j]); - } - } -} - -void BSR_spmv(const BSRMatrix* A, const double* x, double* b) -{ - int start, end, idx; - int first_row, first_col; - double val; - double* block_val; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - first_row = i*A->b_rows; - for (int row = 0; row < A->b_rows; row++) - { - val = 0; - idx = row * A->b_cols; - for (int j = start; j < end; j++) - { - first_col = A->idx2[j]*A->b_cols; - block_val = A->block_vals[j]; - for (int col = 0; col < A->b_cols; col++) - { - val += (block_val[idx + col] * x[first_col + col]); - } - } - b[first_row + row] = val; - } - } -} -template -void CSR_append_T(const CSRMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_T(i, A->idx2[j], b, x, vals[j]); - } - } -} -template -void CSR_append_neg(const CSRMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_neg(i, A->idx2[j], b, x, vals[j]); - } - } -} -template -void CSR_append_neg_T(const CSRMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_rows; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_neg_T(i, A->idx2[j], b, x, vals[j]); - } - } -} - - - -// CSCMatrix SpMV Methods (or BSC) -template -void CSC_append(const CSCMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append(A->idx2[j], i, b, x, vals[j]); - } - } -} -template -void CSC_append_T(const CSCMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_T(A->idx2[j], i, b, x, vals[j]); - } - } -} -template -void CSC_append_neg(const CSCMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_neg(A->idx2[j], i, b, x, vals[j]); - } - } -} -template -void CSC_append_neg_T(const CSCMatrix* A, const std::vector& vals, - const double* x, double* b) -{ - int start, end; - for (int i = 0; i < A->n_cols; i++) - { - start = A->idx1[i]; - end = A->idx1[i+1]; - for (int j = start; j < end; j++) - { - A->append_neg_T(A->idx2[j], i, b, x, vals[j]); - } - } -} - - -void COOMatrix::spmv(const double* x, double* b) const -{ - for (int i = 0; i < n_rows; i++) - b[i] = 0; - COO_append(this, vals, x, b); -} -void COOMatrix::spmv_append(const double* x, double* b) const -{ - COO_append(this, vals, x, b); -} -void COOMatrix::spmv_append_T(const double* x, double* b) const -{ - COO_append_T(this, vals, x, b); -} -void COOMatrix::spmv_append_neg(const double* x, double* b) const -{ - COO_append_neg(this, vals, x, b); -} -void COOMatrix::spmv_append_neg_T(const double* x, double* b) const -{ - COO_append_neg_T(this, vals, x, b); -} -void COOMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - for (int i = 0; i < n_rows; i++) - r[i] = b[i]; - COO_append_neg(this, vals, x, r); -} -void BCOOMatrix::spmv(const double* x, double* b) const -{ - for (int i = 0; i < n_rows * b_rows; i++) - b[i] = 0; - COO_append(this, block_vals, x, b); -} -void BCOOMatrix::spmv_append(const double* x,double* b) const -{ - COO_append(this, block_vals, x, b); -} -void BCOOMatrix::spmv_append_T(const double* x,double* b) const -{ - COO_append_T(this, block_vals, x, b); -} -void BCOOMatrix::spmv_append_neg(const double* x,double* b) const -{ - COO_append_neg(this, block_vals, x, b); -} -void BCOOMatrix::spmv_append_neg_T(const double* x,double* b) const -{ - COO_append_neg_T(this, block_vals, x, b); -} -void BCOOMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - for (int i = 0; i < n_rows * b_rows; i++) - r[i] = b[i]; - COO_append_neg(this, block_vals, x, r); -} - - - -void CSRMatrix::spmv(const double* x, double* b) const -{ - CSR_spmv(this, x, b); -} -void CSRMatrix::spmv_append(const double* x, double* b) const -{ - CSR_append(this, x, b); -} -void CSRMatrix::spmv_append_T(const double* x, double* b) const -{ - CSR_append_T(this, vals, x, b); -} -void CSRMatrix::spmv_append_neg(const double* x, double* b) const -{ - CSR_append_neg(this, vals, x, b); -} -void CSRMatrix::spmv_append_neg_T(const double* x, double* b) const -{ - CSR_append_neg_T(this, vals, x, b); -} -void CSRMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - CSR_residual(this, x, b, r); -} -void BSRMatrix::spmv(const double* x, double* b) const -{ - BSR_spmv(this, x, b); -} -void BSRMatrix::spmv_append(const double* x,double* b) const -{ - BSR_append(this, block_vals, x, b); -} -void BSRMatrix::spmv_append_T(const double* x,double* b) const -{ - CSR_append_T(this, block_vals, x, b); -} -void BSRMatrix::spmv_append_neg(const double* x,double* b) const -{ - CSR_append_neg(this, block_vals, x, b); -} -void BSRMatrix::spmv_append_neg_T(const double* x,double* b) const -{ - CSR_append_neg_T(this, block_vals, x, b); -} -void BSRMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - for (int i = 0; i < n_rows * b_rows; i++) - r[i] = b[i]; - CSR_append_neg(this, block_vals, x, r); -} - - - -void CSCMatrix::spmv(const double* x, double* b) const -{ - for (int i = 0; i < n_rows; i++) - b[i] = 0; - CSC_append(this, vals, x, b); -} -void CSCMatrix::spmv_append(const double* x, double* b) const -{ - CSC_append(this, vals, x, b); -} -void CSCMatrix::spmv_append_T(const double* x, double* b) const -{ - CSC_append_T(this, vals, x, b); -} -void CSCMatrix::spmv_append_neg(const double* x, double* b) const -{ - CSC_append_neg(this, vals, x, b); -} -void CSCMatrix::spmv_append_neg_T(const double* x, double* b) const -{ - CSC_append_neg_T(this, vals, x, b); -} -void CSCMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - for (int i = 0; i < n_rows; i++) - r[i] = b[i]; - CSC_append_neg(this, vals, x, r); -} -void BSCMatrix::spmv(const double* x, double* b) const -{ - for (int i = 0; i < n_rows * b_rows; i++) - b[i] = 0; - CSC_append(this, block_vals, x, b); -} -void BSCMatrix::spmv_append(const double* x,double* b) const -{ - CSC_append(this, block_vals, x, b); -} -void BSCMatrix::spmv_append_T(const double* x,double* b) const -{ - CSC_append_T(this, block_vals, x, b); -} -void BSCMatrix::spmv_append_neg(const double* x,double* b) const -{ - CSC_append_neg(this, block_vals, x, b); -} -void BSCMatrix::spmv_append_neg_T(const double* x,double* b) const -{ - CSC_append_neg_T(this, block_vals, x, b); -} -void BSCMatrix::spmv_residual(const double* x, const double* b, double* r) const -{ - for (int i = 0; i < n_rows * b_rows; i++) - r[i] = b[i]; - CSC_append_neg(this, block_vals, x, r); -} - - - diff --git a/raptor/util/tests/CMakeLists.txt b/raptor/util/tests/CMakeLists.txt deleted file mode 100644 index d78f8224..00000000 --- a/raptor/util/tests/CMakeLists.txt +++ /dev/null @@ -1,155 +0,0 @@ -add_executable(test_spmv_laplacian test_spmv_laplacian.cpp) -target_link_libraries(test_spmv_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(LaplacianSpMVTest ./test_spmv_laplacian) - -add_executable(test_spmv_aniso test_spmv_aniso.cpp) -target_link_libraries(test_spmv_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(AnisoSpMVTest ./test_spmv_aniso) - -add_executable(test_spmv_random test_spmv_random.cpp) -target_link_libraries(test_spmv_random raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(RandomSpMVTest ./test_spmv_random) - -add_executable(test_bsr_spmv_laplacian test_bsr_spmv_laplacian.cpp) -target_link_libraries(test_bsr_spmv_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRLaplacianSpMVTest ./test_bsr_spmv_laplacian) - -add_executable(test_bsr_spmv_aniso test_bsr_spmv_aniso.cpp) -target_link_libraries(test_bsr_spmv_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRAnisoSpMVTest ./test_bsr_spmv_aniso) - -add_executable(test_bsr_spmv_random test_bsr_spmv_random.cpp) -target_link_libraries(test_bsr_spmv_random raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRRandomSpMVTest ./test_bsr_spmv_random) - - -add_executable(test_jacobi_aniso test_jacobi_aniso.cpp) -target_link_libraries(test_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(AnisoJacobiTest ./test_jacobi_aniso) - -add_executable(test_jacobi_laplacian test_jacobi_laplacian.cpp) -target_link_libraries(test_jacobi_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(LaplaceJacobiTest ./test_jacobi_laplacian) - -add_executable(test_gs_aniso test_gs_aniso.cpp) -target_link_libraries(test_gs_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(AnisoGSTest ./test_gs_aniso) - -add_executable(test_gs_laplacian test_gs_laplacian.cpp) -target_link_libraries(test_gs_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(LaplaceGSTest ./test_gs_laplacian) - -# CANNOT CURRENTLY RUN THESE TESTS, BUT RAPTOR SEEMS CORRECT -# TODO : UNCOMMENT WHEN PYAMG BUG IS FIXED -# -#add_executable(test_sor_aniso test_sor_aniso.cpp) -#target_link_libraries(test_sor_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -#add_test(AnisoSORTest ./test_sor_aniso) -# -#add_executable(test_sor_laplacian test_sor_laplacian.cpp) -#target_link_libraries(test_sor_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -#add_test(LaplaceSORTest ./test_sor_laplacian) - -#add_executable(test_bsr_jacobi_aniso test_bsr_jacobi_aniso.cpp) -#target_link_libraries(test_bsr_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -#add_test(BSRAnisoJacobiTest ./test_bsr_jacobi_aniso) - -add_executable(test_bsr_jacobi_aniso test_bsr_jacobi_aniso.cpp) -target_link_libraries(test_bsr_jacobi_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRAnisoJacobiTest ./test_bsr_jacobi_aniso) - -add_executable(test_bsr_jacobi_laplacian test_bsr_jacobi_laplacian.cpp) -target_link_libraries(test_bsr_jacobi_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRLaplaceJacobiTest ./test_bsr_jacobi_laplacian) - -add_executable(test_bsr_gs_aniso test_bsr_gs_aniso.cpp) -target_link_libraries(test_bsr_gs_aniso raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRAnisoGSTest ./test_bsr_gs_aniso) - -add_executable(test_bsr_gs_laplacian test_bsr_gs_laplacian.cpp) -target_link_libraries(test_bsr_gs_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) -add_test(BSRLaplaceGSTest ./test_bsr_gs_laplacian) - -if (WITH_MPI) - add_executable(test_par_add test_par_add.cpp) - target_link_libraries(test_par_add raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParAddTest ${MPIRUN} -n 1 ${HOST} ./test_par_add) - add_test(ParAddTest ${MPIRUN} -n 16 ${HOST} ./test_par_add) - - add_executable(test_par_spmv_laplacian test_par_spmv_laplacian.cpp) - target_link_libraries(test_par_spmv_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParLaplacianSpMVTest ${MPIRUN} -n 1 ${HOST} ./test_par_spmv_laplacian) - add_test(ParLaplacianSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_par_spmv_laplacian) - add_test(ParLaplacianSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_par_spmv_laplacian) - add_test(ParLaplacianSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_par_spmv_laplacian) - add_test(ParLaplacianSpMVTest ${MPIRUN} -n 16 ${HOST} ./test_par_spmv_laplacian) - - add_executable(test_par_spmv_aniso test_par_spmv_aniso.cpp) - target_link_libraries(test_par_spmv_aniso raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParAnisoSpMVTest ${MPIRUN} -n 1 ${HOST} ./test_par_spmv_aniso) - add_test(ParAnisoSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_par_spmv_aniso) - add_test(ParAnisoSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_par_spmv_aniso) - add_test(ParAnisoSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_par_spmv_aniso) - add_test(ParAnisoSpMVTest ${MPIRUN} -n 16 ${HOST} ./test_par_spmv_aniso) - - add_executable(test_par_spmv_random test_par_spmv_random.cpp) - target_link_libraries(test_par_spmv_random ${MPI_LIBRARIES} raptor googletest pthread ) - add_test(ParRandomSpMVTest ${MPIRUN} -n 1 ${HOST} ./test_par_spmv_random) - add_test(ParRandomSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_par_spmv_random) - add_test(ParRandomSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_par_spmv_random) - add_test(ParRandomSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_par_spmv_random) - add_test(ParRandomSpMVTest ${MPIRUN} -n 16 ${HOST} ./test_par_spmv_random) - - add_executable(test_tap_spmv_laplacian test_tap_spmv_laplacian.cpp) - target_link_libraries(test_tap_spmv_laplacian raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(TAPLaplacianSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_tap_spmv_laplacian) - add_test(TAPLaplacianSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_tap_spmv_laplacian) - add_test(TAPLaplacianSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_tap_spmv_laplacian) - add_test(TAPLaplacianSpMVTest ${MPIRUN} -n 16 ${HOST} ./test_tap_spmv_laplacian) - - add_executable(test_tap_spmv_aniso test_tap_spmv_aniso.cpp) - target_link_libraries(test_tap_spmv_aniso raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(TAPAnisoSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_tap_spmv_aniso) - add_test(TAPAnisoSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_tap_spmv_aniso) - add_test(TAPAnisoSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_tap_spmv_aniso) - add_test(TAPAnisoSpMVTest ${MPIRUN} -n 16 ${HOST} ./test_tap_spmv_aniso) - - add_executable(test_tap_spmv_random test_tap_spmv_random.cpp) - target_link_libraries(test_tap_spmv_random ${MPI_LIBRARIES} raptor googletest pthread ) - add_test(TAPRandomSpMVTest ${MPIRUN} -n 2 ${HOST} ./test_tap_spmv_random) - add_test(TAPRandomSpMVTest ${MPIRUN} -n 4 ${HOST} ./test_tap_spmv_random) - add_test(TAPRandomSpMVTest ${MPIRUN} -n 8 ${HOST} ./test_tap_spmv_random) - - add_executable(test_par_scale_aniso test_par_scale_aniso.cpp) - target_link_libraries(test_par_scale_aniso raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParScaleAnisoTest ${MPIRUN} -n 1 ${HOST} ./test_par_scale_aniso) - add_test(ParScaleAnisoTest ${MPIRUN} -n 2 ${HOST} ./test_par_scale_aniso) - add_test(ParScaleAnisoTest ${MPIRUN} -n 3 ${HOST} ./test_par_scale_aniso) - add_test(ParScaleAnisoTest ${MPIRUN} -n 6 ${HOST} ./test_par_scale_aniso) - - add_executable(test_repartition test_repartition.cpp) - target_link_libraries(test_repartition raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(RepartitionTest ${MPIRUN} -n 1 ${HOST} ./test_repartition) - add_test(RepartitionTest ${MPIRUN} -n 2 ${HOST} ./test_repartition) - add_test(RepartitionTest ${MPIRUN} -n 3 ${HOST} ./test_repartition) - add_test(RepartitionTest ${MPIRUN} -n 6 ${HOST} ./test_repartition) - add_test(RepartitionTest ${MPIRUN} -n 16 ${HOST} ./test_repartition) - - if (WITH_PTSCOTCH) - add_executable(test_ptscotch test_ptscotch.cpp) - target_link_libraries(test_ptscotch raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(PTScotchTest ${MPIRUN} -n 1 ${HOST} ./test_ptscotch) - add_test(PTScotchTest ${MPIRUN} -n 2 ${HOST} ./test_ptscotch) - add_test(PTScotchTest ${MPIRUN} -n 3 ${HOST} ./test_ptscotch) - add_test(PTScotchTest ${MPIRUN} -n 6 ${HOST} ./test_ptscotch) - endif() - - if (WITH_PARMETIS) - add_executable(test_parmetis test_parmetis.cpp) - target_link_libraries(test_parmetis raptor ${MPI_LIBRARIES} googletest pthread ) - add_test(ParMetisTest ${MPIRUN} -n 1 ${HOST} ./test_parmetis) - add_test(ParMetisTest ${MPIRUN} -n 2 ${HOST} ./test_parmetis) - add_test(ParMetisTest ${MPIRUN} -n 3 ${HOST} ./test_parmetis) - add_test(ParMetisTest ${MPIRUN} -n 6 ${HOST} ./test_parmetis) - endif() -endif() diff --git a/raptor/util/tests/README.md b/raptor/util/tests/README.md deleted file mode 100644 index 038d718d..00000000 --- a/raptor/util/tests/README.md +++ /dev/null @@ -1 +0,0 @@ -testing