From f430d73085a77dc5a23b89a7d25d9e6aa56d784e Mon Sep 17 00:00:00 2001 From: Denghui Lu Date: Fri, 12 Jan 2024 15:14:24 +0800 Subject: [PATCH 1/3] Feature: Add CUDA support for stress_mgga function (#3474) * init CPU implementation * init gpu implementation * fix cpu double error * fix single precision error * init gtest * init test compilation * finish work * fix Makefile error * fix UT error * fix UT error --- source/CMakeLists.txt | 2 + source/Makefile.Objects | 2 + .../module_container/ATen/core/tensor.h | 34 ++++++ .../module_container/ATen/core/tensor_types.h | 14 +++ .../ATen/kernels/test/blas_test.cpp | 8 +- .../ATen/kernels/test/lapack_test.cpp | 12 +- .../ATen/kernels/test/linalg_test.cpp | 4 +- .../ATen/kernels/test/memory_test.cpp | 4 +- .../ATen/ops/test/einsum_op_test.cpp | 13 ++- .../ATen/ops/test/linalg_op_test.cpp | 4 +- .../module_container/base/utils/gtest.h | 57 ++++++++++ .../module_container/test/test_utils.h | 57 ---------- source/module_basis/module_pw/fft.cpp | 42 ++----- .../module_surchem/test/CMakeLists.txt | 4 +- .../module_xc/CMakeLists.txt | 1 + .../kernels/cuda/xc_functional_op.cu | 79 +++++++++++++ .../module_xc/kernels/test/CMakeLists.txt | 5 + .../kernels/test/xc_functional_op_test.cpp | 81 +++++++++++++ .../module_xc/kernels/xc_functional_op.cpp | 49 ++++++++ .../module_xc/kernels/xc_functional_op.h | 32 ++++++ .../module_xc/test/CMakeLists.txt | 35 ++++-- .../module_xc/test/test_xc3.cpp | 17 ++- .../module_xc/test/test_xc5.cpp | 10 -- .../module_xc/test/xc3_mock.h | 107 ++++++++++++++++-- .../module_xc/xc_functional.h | 16 ++- .../module_xc/xc_functional_gradcorr.cpp | 69 ++++++----- .../hamilt_pwdft/kernels/cuda/stress_op.cu | 44 ++++++- .../hamilt_pwdft/kernels/stress_op.cpp | 27 +++++ .../hamilt_pwdft/kernels/stress_op.h | 13 +++ .../hamilt_pwdft/kernels/test/CMakeLists.txt | 5 +- .../kernels/test/stress_op_mgga_test.cpp | 45 ++++++++ .../hamilt_pwdft/stress_func.h | 2 +- .../hamilt_pwdft/stress_func_mgga.cpp | 72 ++++-------- .../hamilt_pwdft/stress_pw.cpp | 2 +- 34 files changed, 740 insertions(+), 228 deletions(-) create mode 100644 source/module_base/module_container/base/utils/gtest.h delete mode 100644 source/module_base/module_container/test/test_utils.h create mode 100644 source/module_hamilt_general/module_xc/kernels/cuda/xc_functional_op.cu create mode 100644 source/module_hamilt_general/module_xc/kernels/test/CMakeLists.txt create mode 100644 source/module_hamilt_general/module_xc/kernels/test/xc_functional_op_test.cpp create mode 100644 source/module_hamilt_general/module_xc/kernels/xc_functional_op.cpp create mode 100644 source/module_hamilt_general/module_xc/kernels/xc_functional_op.h create mode 100644 source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_mgga_test.cpp diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 3e866b8323..de64277d1c 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -39,6 +39,7 @@ list(APPEND device_srcs module_hamilt_pw/hamilt_pwdft/kernels/wf_op.cpp module_hamilt_pw/hamilt_pwdft/kernels/vnl_op.cpp module_base/kernels/math_op.cpp + module_hamilt_general/module_xc/kernels/xc_functional_op.cpp ) if(USE_CUDA) @@ -57,6 +58,7 @@ if(USE_CUDA) module_hamilt_pw/hamilt_pwdft/kernels/cuda/wf_op.cu module_hamilt_pw/hamilt_pwdft/kernels/cuda/vnl_op.cu module_base/kernels/cuda/math_op.cu + module_hamilt_general/module_xc/kernels/cuda/xc_functional_op.cu ) endif() diff --git a/source/Makefile.Objects b/source/Makefile.Objects index f0e4c1331c..a0f0065a10 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -41,6 +41,7 @@ VPATH=./src_global:\ ./module_hamilt_general/module_ewald:\ ./module_hamilt_general/module_surchem:\ ./module_hamilt_general/module_xc:\ +./module_hamilt_general/module_xc/kernels:\ ./module_hamilt_pw/hamilt_pwdft:\ ./module_hamilt_pw/hamilt_ofdft:\ ./module_hamilt_pw/hamilt_stodft:\ @@ -392,6 +393,7 @@ OBJS_SYMMETRY=symm_other.o\ symmetry.o\ OBJS_XC=xc_functional.o\ + xc_functional_op.o\ xc_functional_vxc.o\ xc_functional_gradcorr.o\ xc_functional_wrapper_xc.o\ diff --git a/source/module_base/module_container/ATen/core/tensor.h b/source/module_base/module_container/ATen/core/tensor.h index 826c7e19c0..3faf820dbd 100644 --- a/source/module_base/module_container/ATen/core/tensor.h +++ b/source/module_base/module_container/ATen/core/tensor.h @@ -244,6 +244,40 @@ class Tensor { return output; } + /** + * @brief Copies data from a given device to the current tensor object. + * + * This function is designed to copy a given number of elements from a device-specific memory location + * to the memory associated with this object. It ensures that the size of the data being copied does not exceed + * the size of the destination tensor. + * + * @tparam DEVICE The device type from which the data will be copied. + * @tparam T The data type of the elements being copied. + * + * @param data Pointer to the data array in the device memory that needs to be copied. + * @param num_elements The number of elements to copy. + * + * @pre The number of elements to copy (`num_elements`) must be less than or equal to the number of elements + * in the destination tensor (`this->shape_.num_elements()`). If this condition is not met, the function + * will trigger an error through `REQUIRES_OK`. + * + * @note The function uses a template specialization `TEMPLATE_CZ_2` to handle the copying of memory + * based on the data type `T` and the device type `DEVICE`. It utilizes the `kernels::cast_memory` + * method to perform the actual memory copy operation. + */ + template + void copy_from_device(const T* data, int64_t num_elements = -1) { + if (num_elements == -1) { + num_elements = this->NumElements(); + } + REQUIRES_OK(this->shape_.NumElements() >= num_elements, + "The number of elements of the input data must match the number of elements of the tensor.") + + TEMPLATE_CZ_2(this->data_type_, this->device_, + kernels::cast_memory()( + this->data(), data, num_elements)) + } + /** * @brief Method to transform data from a given tensor object to the output tensor with a given data type * diff --git a/source/module_base/module_container/ATen/core/tensor_types.h b/source/module_base/module_container/ATen/core/tensor_types.h index 0722c4e53b..254ad22d3e 100644 --- a/source/module_base/module_container/ATen/core/tensor_types.h +++ b/source/module_base/module_container/ATen/core/tensor_types.h @@ -124,6 +124,20 @@ struct PsiToContainer { using type = container::DEVICE_GPU; /**< The return type specialization for std::complex. */ }; +template +struct ContainerToPsi { + using type = T; /**< The return type based on the input type. */ +}; + +template <> +struct ContainerToPsi { + using type = psi::DEVICE_CPU; /**< The return type specialization for std::complex. */ +}; + +template <> +struct ContainerToPsi { + using type = psi::DEVICE_GPU; /**< The return type specialization for std::complex. */ +}; /** diff --git a/source/module_base/module_container/ATen/kernels/test/blas_test.cpp b/source/module_base/module_container/ATen/kernels/test/blas_test.cpp index f5ff285473..a01ef8beb9 100644 --- a/source/module_base/module_container/ATen/kernels/test/blas_test.cpp +++ b/source/module_base/module_container/ATen/kernels/test/blas_test.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace container { namespace kernels { @@ -11,14 +11,14 @@ template class BlasTest : public testing::Test { public: BlasTest() { - test_utils::init_blas_handle(); + base::utils::init_blas_handle(); } ~BlasTest() override { - test_utils::delete_blas_handle(); + base::utils::delete_blas_handle(); } }; -TYPED_TEST_SUITE(BlasTest, test_utils::Types); +TYPED_TEST_SUITE(BlasTest, base::utils::Types); TYPED_TEST(BlasTest, Dot) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/ATen/kernels/test/lapack_test.cpp b/source/module_base/module_container/ATen/kernels/test/lapack_test.cpp index 7e340d510a..f9998c018c 100644 --- a/source/module_base/module_container/ATen/kernels/test/lapack_test.cpp +++ b/source/module_base/module_container/ATen/kernels/test/lapack_test.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace container { namespace kernels { @@ -11,16 +11,16 @@ template class LapackTest : public testing::Test { public: LapackTest() { - test_utils::init_blas_handle(); - test_utils::init_cusolver_handle(); + base::utils::init_blas_handle(); + base::utils::init_cusolver_handle(); } ~LapackTest() override { - test_utils::delete_blas_handle(); - test_utils::delete_cusolver_handle(); + base::utils::delete_blas_handle(); + base::utils::delete_cusolver_handle(); } }; -TYPED_TEST_SUITE(LapackTest, test_utils::Types); +TYPED_TEST_SUITE(LapackTest, base::utils::Types); TYPED_TEST(LapackTest, Trtri) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/ATen/kernels/test/linalg_test.cpp b/source/module_base/module_container/ATen/kernels/test/linalg_test.cpp index 31f6bd21eb..8b0afe634f 100644 --- a/source/module_base/module_container/ATen/kernels/test/linalg_test.cpp +++ b/source/module_base/module_container/ATen/kernels/test/linalg_test.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace container { namespace kernels { @@ -15,7 +15,7 @@ class LinalgTest : public testing::Test { ~LinalgTest() override = default; }; -TYPED_TEST_SUITE(LinalgTest, test_utils::Types); +TYPED_TEST_SUITE(LinalgTest, base::utils::Types); TYPED_TEST(LinalgTest, Add) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/ATen/kernels/test/memory_test.cpp b/source/module_base/module_container/ATen/kernels/test/memory_test.cpp index b5bdc7eccf..e8943ae610 100644 --- a/source/module_base/module_container/ATen/kernels/test/memory_test.cpp +++ b/source/module_base/module_container/ATen/kernels/test/memory_test.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace container { namespace kernels { @@ -15,7 +15,7 @@ class MemoryTest : public testing::Test { ~MemoryTest() override = default; }; -TYPED_TEST_SUITE(MemoryTest, test_utils::Types); +TYPED_TEST_SUITE(MemoryTest, base::utils::Types); TYPED_TEST(MemoryTest, ResizeAndSynchronizeMemory) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/ATen/ops/test/einsum_op_test.cpp b/source/module_base/module_container/ATen/ops/test/einsum_op_test.cpp index ce4381e789..efec0072f0 100644 --- a/source/module_base/module_container/ATen/ops/test/einsum_op_test.cpp +++ b/source/module_base/module_container/ATen/ops/test/einsum_op_test.cpp @@ -2,7 +2,8 @@ #include #include -#include +#include + namespace container { namespace op { @@ -11,16 +12,16 @@ template class EinsumOpTest : public testing::Test { public: EinsumOpTest() { - test_utils::init_blas_handle(); - test_utils::init_cusolver_handle(); + base::utils::init_blas_handle(); + base::utils::init_cusolver_handle(); } ~EinsumOpTest() override { - test_utils::delete_blas_handle(); - test_utils::delete_cusolver_handle(); + base::utils::delete_blas_handle(); + base::utils::delete_cusolver_handle(); } }; -TYPED_TEST_SUITE(EinsumOpTest, test_utils::Types); +TYPED_TEST_SUITE(EinsumOpTest, base::utils::Types); TYPED_TEST(EinsumOpTest, Transform) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/ATen/ops/test/linalg_op_test.cpp b/source/module_base/module_container/ATen/ops/test/linalg_op_test.cpp index 8f0d34491c..610daec816 100644 --- a/source/module_base/module_container/ATen/ops/test/linalg_op_test.cpp +++ b/source/module_base/module_container/ATen/ops/test/linalg_op_test.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace container { namespace op { @@ -14,7 +14,7 @@ class LinalgOpTest : public testing::Test { ~LinalgOpTest() override = default; }; -TYPED_TEST_SUITE(LinalgOpTest, test_utils::Types); +TYPED_TEST_SUITE(LinalgOpTest, base::utils::Types); TYPED_TEST(LinalgOpTest, Add) { using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; diff --git a/source/module_base/module_container/base/utils/gtest.h b/source/module_base/module_container/base/utils/gtest.h new file mode 100644 index 0000000000..4ec7ecb118 --- /dev/null +++ b/source/module_base/module_container/base/utils/gtest.h @@ -0,0 +1,57 @@ +#ifndef BASE_UTILS_GTEST_H_ +#define BASE_UTILS_GTEST_H_ +#include +#include +#include + +namespace base { +namespace utils { + +#if __CUDA || __ROCM +using ComplexTypes = ::testing::Types< + std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>, + std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>>; +using Types = ::testing::Types< + std::tuple, std::tuple, + std::tuple, std::tuple, + std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>, + std::tuple, ct::DEVICE_CPU>, std::tuple, ct::DEVICE_GPU>>; +#else +using ComplexTypes = ::testing::Types< + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_CPU>>; +using Types = ::testing::Types< + std::tuple, + std::tuple, + std::tuple, ct::DEVICE_CPU>, + std::tuple, ct::DEVICE_CPU>>; +#endif + +static inline void init_blas_handle() { + #if __CUDA || __ROCM + ct::kernels::createGpuBlasHandle(); + #endif +} + +static inline void delete_blas_handle() { + #if __CUDA || __ROCM + ct::kernels::destroyGpuBlasHandle(); + #endif +} + +static inline void init_cusolver_handle() { + #if __CUDA || __ROCM + ct::kernels::createGpuSolverHandle(); + #endif +} + +static inline void delete_cusolver_handle() { + #if __CUDA || __ROCM + ct::kernels::destroyGpuSolverHandle(); + #endif +} + +} // namespace utils +} // namespace base + +#endif // BASE_UTILS_GTEST_H_ \ No newline at end of file diff --git a/source/module_base/module_container/test/test_utils.h b/source/module_base/module_container/test/test_utils.h deleted file mode 100644 index d50120c34a..0000000000 --- a/source/module_base/module_container/test/test_utils.h +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef ATEN_KERNELS_TEST_OP_TEST_UTILS_H_ -#define ATEN_KERNELS_TEST_OP_TEST_UTILS_H_ -#include -#include "module_base/module_container/ATen/kernels/blas.h" -#include "module_base/module_container/ATen/kernels/lapack.h" - -namespace container { -namespace test_utils { - -# if __CUDA || __ROCM -using ComplexTypes = ::testing::Types< - std::tuple, DEVICE_CPU>, std::tuple, DEVICE_GPU>, - std::tuple, DEVICE_CPU>, std::tuple, DEVICE_GPU>>; -using Types = ::testing::Types< - std::tuple, std::tuple, - std::tuple, std::tuple, - std::tuple, DEVICE_CPU>, std::tuple, DEVICE_GPU>, - std::tuple, DEVICE_CPU>, std::tuple, DEVICE_GPU>>; -#else -using ComplexTypes = ::testing::Types< - std::tuple, DEVICE_CPU>, - std::tuple, DEVICE_CPU>>; -using Types = ::testing::Types< - std::tuple, - std::tuple, - std::tuple, DEVICE_CPU>, - std::tuple, DEVICE_CPU>>; -#endif - -static inline void init_blas_handle() { - #if __CUDA || __ROCM - kernels::createGpuBlasHandle(); - #endif -} - -static inline void delete_blas_handle() { - #if __CUDA || __ROCM - kernels::destroyGpuBlasHandle(); - #endif -} - -static inline void init_cusolver_handle() { - #if __CUDA || __ROCM - kernels::createGpuSolverHandle(); - #endif -} - -static inline void delete_cusolver_handle() { - #if __CUDA || __ROCM - kernels::destroyGpuSolverHandle(); - #endif -} - -} // namespace test_utils -} // namespace container - -#endif // ATEN_KERNELS_TEST_OP_TEST_UTILS_H_ \ No newline at end of file diff --git a/source/module_basis/module_pw/fft.cpp b/source/module_basis/module_pw/fft.cpp index ce15d4a9fb..c6ea0166f7 100644 --- a/source/module_basis/module_pw/fft.cpp +++ b/source/module_basis/module_pw/fft.cpp @@ -22,17 +22,13 @@ void FFT::clear() d_rspace = nullptr; #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - if (this->precision == "single") { - if (c_auxr_3d != nullptr) { - delmem_cd_op()(gpu_ctx, c_auxr_3d); - c_auxr_3d = nullptr; - } + if (c_auxr_3d != nullptr) { + delmem_cd_op()(gpu_ctx, c_auxr_3d); + c_auxr_3d = nullptr; } - else { - if (z_auxr_3d != nullptr) { - delmem_zd_op()(gpu_ctx, z_auxr_3d); - z_auxr_3d = nullptr; - } + if (z_auxr_3d != nullptr) { + delmem_zd_op()(gpu_ctx, z_auxr_3d); + z_auxr_3d = nullptr; } } #endif // defined(__CUDA) || defined(__ROCM) @@ -86,12 +82,8 @@ void FFT:: initfft(int nx_in, int ny_in, int nz_in, int lixy_in, int rixy_in, in // fftw_malloc(sizeof(fftw_complex) * (this->nx * this->ny * this->nz))); #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - if (this->precision == "single") { - resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz); - } - else { - resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz); - } + resmem_cd_op()(gpu_ctx, this->c_auxr_3d, this->nx * this->ny * this->nz); + resmem_zd_op()(gpu_ctx, this->z_auxr_3d, this->nx * this->ny * this->nz); } #endif // defined(__CUDA) || defined(__ROCM) #if defined(__ENABLE_FLOAT_FFTW) @@ -242,20 +234,13 @@ void FFT :: initplan(const unsigned int& flag) #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - if (this->precision == "single") { #if defined(__CUDA) cufftPlan3d(&c_handle, this->nx, this->ny, this->nz, CUFFT_C2C); - #elif defined(__ROCM) - hipfftPlan3d(&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); - #endif - } - else { - #if defined(__CUDA) cufftPlan3d(&z_handle, this->nx, this->ny, this->nz, CUFFT_Z2Z); #elif defined(__ROCM) + hipfftPlan3d(&c_handle, this->nx, this->ny, this->nz, HIPFFT_C2C); hipfftPlan3d(&z_handle, this->nx, this->ny, this->nz, HIPFFT_Z2Z); #endif - } } #endif @@ -363,20 +348,13 @@ void FFT:: cleanFFT() // fftw_destroy_plan(this->plan3dbackward); #if defined(__CUDA) || defined(__ROCM) if (this->device == "gpu") { - if (this->precision == "single") { #if defined(__CUDA) if(c_handle) { cufftDestroy(c_handle); c_handle = {};} - #elif defined(__ROCM) - if(c_handle) { hipfftDestroy(c_handle); c_handle = {};} - #endif - } - else { - #if defined(__CUDA) if(z_handle) { cufftDestroy(z_handle); z_handle = {};} #elif defined(__ROCM) + if(c_handle) { hipfftDestroy(c_handle); c_handle = {};} if(z_handle) { hipfftDestroy(z_handle); z_handle = {};} #endif - } } #endif } diff --git a/source/module_hamilt_general/module_surchem/test/CMakeLists.txt b/source/module_hamilt_general/module_surchem/test/CMakeLists.txt index 91ead3f567..1cc52484e7 100644 --- a/source/module_hamilt_general/module_surchem/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_surchem/test/CMakeLists.txt @@ -28,7 +28,7 @@ AddTest( AddTest( TARGET surchem_cal_vcav - LIBS ${math_libs} planewave device base + LIBS ${math_libs} planewave device base container SOURCES cal_vcav_test.cpp ../cal_vcav.cpp ../surchem.cpp ../../../module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp ../../module_xc/xc_functional_gradcorr.cpp ../../module_xc/xc_functional.cpp ../../module_xc/xc_functional_wrapper_xc.cpp ../../module_xc/xc_functional_wrapper_gcxc.cpp @@ -39,7 +39,7 @@ AddTest( AddTest( TARGET surchem_cal_vel - LIBS ${math_libs} planewave device base + LIBS ${math_libs} planewave device base container SOURCES cal_vel_test.cpp ../cal_vel.cpp ../surchem.cpp ../cal_epsilon.cpp ../minimize_cg.cpp ../../../module_hamilt_pw/hamilt_pwdft/parallel_grid.cpp ../../module_xc/xc_functional_gradcorr.cpp ../../module_xc/xc_functional.cpp ../../module_xc/xc_functional_wrapper_xc.cpp ../../module_xc/xc_functional_wrapper_gcxc.cpp diff --git a/source/module_hamilt_general/module_xc/CMakeLists.txt b/source/module_hamilt_general/module_xc/CMakeLists.txt index 5c72f608fd..087a2d22ff 100644 --- a/source/module_hamilt_general/module_xc/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/CMakeLists.txt @@ -21,5 +21,6 @@ endif() if(BUILD_TESTING) if(ENABLE_LIBXC) add_subdirectory(test) + add_subdirectory(kernels/test) endif() endif() diff --git a/source/module_hamilt_general/module_xc/kernels/cuda/xc_functional_op.cu b/source/module_hamilt_general/module_xc/kernels/cuda/xc_functional_op.cu new file mode 100644 index 0000000000..43c137153d --- /dev/null +++ b/source/module_hamilt_general/module_xc/kernels/cuda/xc_functional_op.cu @@ -0,0 +1,79 @@ +#include +#include +#include + +#define THREADS_PER_BLOCK 256 + +namespace hamilt { + +template +__global__ void xc_functional_grad_wfc( + const int ik, + const int pol, + const int npw, + const int npwx, + const T tpiba, + const T* gcar, + const T* kvec_c, + const thrust::complex* rhog, + thrust::complex* porter) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx >= npw) { return; } + // the formula is : rho(r)^prime = \int iG * rho(G)e^{iGr} dG + // double kplusg = wfc_basis->getgpluskcar(ik,ig)[ipol] * tpiba; + T kplusg = (gcar[(ik * npwx + idx) * 3 + pol] + + kvec_c[ik * 3 + pol]) * tpiba; + // calculate the charge density gradient in reciprocal space. + porter[idx] = thrust::complex(0.0, kplusg) * rhog[idx]; +} + +template +__global__ void xc_functional_grad_wfc( + const int ipol, + const int nrxx, + const T* porter, + T* grad) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx >= nrxx) { return; } + grad[ipol * nrxx + idx] = porter[idx]; +} + +template +void xc_functional_grad_wfc_op::operator()( + const int& ik, + const int& pol, + const int& npw, + const int& npwx, + const Real& tpiba, + const Real * gcar, + const Real * kvec_c, + const T * rhog, + T* porter) +{ + auto porter_ = reinterpret_cast*>(porter); + auto rhog_ = reinterpret_cast*>(rhog); + const int block = (npw + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + xc_functional_grad_wfc<<>>( + ik, pol, npw, npwx, tpiba, gcar, kvec_c, rhog_, porter_); +} + +template +void xc_functional_grad_wfc_op::operator()( + const int& ipol, + const int& nrxx, + const T * porter, + T* grad) +{ + auto grad_ = reinterpret_cast*>(grad); + auto porter_ = reinterpret_cast*>(porter); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + xc_functional_grad_wfc<<>>( + ipol, nrxx, porter_, grad_); +} + +template struct xc_functional_grad_wfc_op , psi::DEVICE_GPU>; +template struct xc_functional_grad_wfc_op, psi::DEVICE_GPU>; + +} // namespace hamilt \ No newline at end of file diff --git a/source/module_hamilt_general/module_xc/kernels/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/kernels/test/CMakeLists.txt new file mode 100644 index 0000000000..abaf529ae9 --- /dev/null +++ b/source/module_hamilt_general/module_xc/kernels/test/CMakeLists.txt @@ -0,0 +1,5 @@ +AddTest( + TARGET XC_Functional_UTs + LIBS ${math_libs} device base container + SOURCES xc_functional_op_test.cpp +) diff --git a/source/module_hamilt_general/module_xc/kernels/test/xc_functional_op_test.cpp b/source/module_hamilt_general/module_xc/kernels/test/xc_functional_op_test.cpp new file mode 100644 index 0000000000..2861e0601a --- /dev/null +++ b/source/module_hamilt_general/module_xc/kernels/test/xc_functional_op_test.cpp @@ -0,0 +1,81 @@ +#include + +#include +#include + +namespace hamilt { + +template +class XC_FunctionalOpTest : public testing::Test { +public: + XC_FunctionalOpTest() = default; + + ~XC_FunctionalOpTest() override = default; +}; + +TYPED_TEST_SUITE(XC_FunctionalOpTest, base::utils::ComplexTypes); + +TYPED_TEST(XC_FunctionalOpTest, xc_functional_grad_wfc_op) { + using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Real = typename GetTypeReal::type; + + const int ik = 0; + const int ipol = 0; + const int npw = 4; + const int npwx = 4; + const int nrxx = 4; + const Real tpiba = 3.3249187157734292; + + ct::Tensor gcar = std::move(ct::Tensor( + {static_cast(0), static_cast(0), static_cast(-0.46354790605367302), + static_cast(0), static_cast(0), static_cast(-0.30903193736911533), + static_cast(0), static_cast(0), static_cast(-0.15451596868455766), + static_cast(0), static_cast(0), static_cast(0)}).to_device()); + + ct::Tensor kvec_c = std::move(ct::Tensor({static_cast(0)}).to_device()); + + ct::Tensor rhog = std::move(ct::Tensor( + {static_cast(-3.0017116103913875e-05, 7.1056289872527673e-08), + static_cast(-7.5546991753311607e-05, -2.3428181621642512e-07), + static_cast(-0.00092404657246742273, 1.604946087155834e-06), + static_cast(0.98692408978015989, 1.8132214954316574e-09)}).to_device()); + + ct::Tensor expected_porter = std::move(ct::Tensor( + {static_cast(0), + static_cast(0), + static_cast(0), + static_cast(0)}).to_device()); + + auto porter = expected_porter; + + ct::Tensor porter_after = std::move(ct::Tensor( + {static_cast(1.909712182465086e-07, 3.6064630015014698e-16), + static_cast(-1.6872327080003407e-07, -3.4645680421774294e-16), + static_cast(-2.3926040075122384e-07, -5.37330596683816e-16), + static_cast(1.5465312289181674e-08, -8.7169854667834556e-17)}).to_device()); + + ct::Tensor expected_grad = porter; + auto grad = expected_grad; + grad.zero(); + + auto xc_functional_grad_wfc_solver = xc_functional_grad_wfc_op::type>(); + + xc_functional_grad_wfc_solver( + ik, ipol, npw, npwx, // Integers + tpiba, // Double + gcar.data(), // Array of Real + kvec_c.data(), // Array of double + rhog.data(), porter.data()); // Array of std::complex + + EXPECT_EQ(porter, expected_porter); + + + xc_functional_grad_wfc_solver( + ipol, nrxx, // Integers + porter_after.data(), grad.data()); // Array of std::complex + + EXPECT_EQ(grad, expected_grad); +} + +} // namespace hamilt \ No newline at end of file diff --git a/source/module_hamilt_general/module_xc/kernels/xc_functional_op.cpp b/source/module_hamilt_general/module_xc/kernels/xc_functional_op.cpp new file mode 100644 index 0000000000..d553764a42 --- /dev/null +++ b/source/module_hamilt_general/module_xc/kernels/xc_functional_op.cpp @@ -0,0 +1,49 @@ +#include + +namespace hamilt { + +template +void xc_functional_grad_wfc_op::operator()( + const int& ik, + const int& pol, + const int& npw, + const int& npwx, + const Real& tpiba, + const Real * gcar, + const Real * kvec_c, + const T * rhog, + T* porter) +{ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 1024) +#endif + for(int ig = 0; ig < npw; ig++) { + // the formula is : rho(r)^prime = \int iG * rho(G)e^{iGr} dG + // double kplusg = wfc_basis->getgpluskcar(ik,ig)[ipol] * tpiba; + Real kplusg = (gcar[(ik * npwx + ig) * 3 + pol] + + kvec_c[ik * 3 + pol]) * tpiba; + + // calculate the charge density gradient in reciprocal space. + porter[ig] = T(0.0, kplusg) * rhog[ig]; + } +} + +template +void xc_functional_grad_wfc_op::operator()( + const int& ipol, + const int& nrxx, + const T * porter, + T* grad) +{ +#ifdef _OPENMP +#pragma omp parallel for schedule(static, 1024) +#endif + for (int ir = 0; ir < nrxx; ++ir) { + grad[ipol * nrxx + ir] = porter[ir]; + } +} + +template struct xc_functional_grad_wfc_op , psi::DEVICE_CPU>; +template struct xc_functional_grad_wfc_op, psi::DEVICE_CPU>; + +} // namespace hamilt \ No newline at end of file diff --git a/source/module_hamilt_general/module_xc/kernels/xc_functional_op.h b/source/module_hamilt_general/module_xc/kernels/xc_functional_op.h new file mode 100644 index 0000000000..1c83947182 --- /dev/null +++ b/source/module_hamilt_general/module_xc/kernels/xc_functional_op.h @@ -0,0 +1,32 @@ +#ifndef MODULE_HAMILT_GENERAL_MODULE_XC_KERNELS_H_ +#define MODULE_HAMILT_GENERAL_MODULE_XC_KERNELS_H_ + +#include +#include +#include + +namespace hamilt { + +template +struct xc_functional_grad_wfc_op { + using Real = typename GetTypeReal::type; + void operator()( + const int& ik, + const int& pol, + const int& npw, + const int& npwx, + const Real& tpiba, + const Real * gcar, + const Real * kvec_c, + const T * rhog, + T* porter); + + void operator()( + const int& ipol, + const int& nrxx, + const T * porter, + T* grad); +}; + +} // namespace hamilt +#endif // MODULE_HAMILT_GENERAL_MODULE_XC_KERNELS_H_ \ No newline at end of file diff --git a/source/module_hamilt_general/module_xc/test/CMakeLists.txt b/source/module_hamilt_general/module_xc/test/CMakeLists.txt index eca867f8be..ee263a6d5c 100644 --- a/source/module_hamilt_general/module_xc/test/CMakeLists.txt +++ b/source/module_hamilt_general/module_xc/test/CMakeLists.txt @@ -254,24 +254,30 @@ AddTest( AddTest( TARGET XCTest_GRADCORR - LIBS MPI::MPI_CXX Libxc::xc ${math_libs} + LIBS MPI::MPI_CXX Libxc::xc ${math_libs} psi device container SOURCES test_xc3.cpp ../xc_functional_gradcorr.cpp ../xc_functional.cpp ../xc_functional_wrapper_xc.cpp ../xc_functional_wrapper_gcxc.cpp ../xc_functional_wrapper_tauxc.cpp ../xc_funct_corr_gga.cpp ../xc_funct_corr_lda.cpp ../xc_funct_exch_gga.cpp ../xc_funct_exch_lda.cpp ../xc_funct_hcth.cpp ../../../module_base/matrix.cpp + ../../../module_base/memory.cpp + ../../../module_base/libm/branred.cpp + ../../../module_base/libm/sincos.cpp ) AddTest( TARGET XCTest_GRADWFC - LIBS MPI::MPI_CXX Libxc::xc ${math_libs} + LIBS MPI::MPI_CXX Libxc::xc ${math_libs} psi device container SOURCES test_xc3.cpp ../xc_functional_gradcorr.cpp ../xc_functional.cpp ../xc_functional_wrapper_xc.cpp ../xc_functional_wrapper_gcxc.cpp ../xc_functional_wrapper_tauxc.cpp ../xc_funct_corr_gga.cpp ../xc_funct_corr_lda.cpp ../xc_funct_exch_gga.cpp ../xc_funct_exch_lda.cpp ../xc_funct_hcth.cpp ../../../module_base/matrix.cpp + ../../../module_base/memory.cpp + ../../../module_base/libm/branred.cpp + ../../../module_base/libm/sincos.cpp ) AddTest( @@ -285,19 +291,23 @@ AddTest( AddTest( TARGET XCTest_VXC - LIBS MPI::MPI_CXX Libxc::xc ${math_libs} formatter + LIBS MPI::MPI_CXX Libxc::xc ${math_libs} psi device container formatter SOURCES test_xc5.cpp ../xc_functional_gradcorr.cpp ../xc_functional.cpp ../xc_functional_wrapper_xc.cpp ../xc_functional_wrapper_gcxc.cpp ../xc_functional_wrapper_tauxc.cpp ../xc_funct_corr_gga.cpp ../xc_funct_corr_lda.cpp ../xc_funct_exch_gga.cpp ../xc_funct_exch_lda.cpp ../xc_funct_hcth.cpp - ../../../module_base/matrix.cpp ../../../module_base/timer.cpp ../xc_functional_vxc.cpp + ../../../module_base/matrix.cpp + ../../../module_base/memory.cpp + ../../../module_base/timer.cpp + ../../../module_base/libm/branred.cpp + ../../../module_base/libm/sincos.cpp ) AddTest( TARGET XCTest_VXC_Libxc - LIBS MPI::MPI_CXX Libxc::xc ${math_libs} formatter + LIBS MPI::MPI_CXX Libxc::xc ${math_libs} psi device container formatter SOURCES test_xc5.cpp ../xc_functional_gradcorr.cpp ../xc_functional.cpp ../xc_functional_wrapper_xc.cpp ../xc_functional_wrapper_gcxc.cpp ../xc_functional_wrapper_tauxc.cpp @@ -305,11 +315,16 @@ AddTest( ../xc_funct_exch_lda.cpp ../xc_funct_hcth.cpp ../../../module_base/matrix.cpp ../../../module_base/timer.cpp ../xc_functional_vxc.cpp + ../../../module_base/matrix.cpp + ../../../module_base/memory.cpp + ../../../module_base/timer.cpp + ../../../module_base/libm/branred.cpp + ../../../module_base/libm/sincos.cpp ) AddTest( TARGET XCTest_VXC_meta - LIBS MPI::MPI_CXX Libxc::xc ${math_libs} formatter + LIBS MPI::MPI_CXX Libxc::xc ${math_libs} psi device container formatter SOURCES test_xc5.cpp ../xc_functional_gradcorr.cpp ../xc_functional.cpp ../xc_functional_wrapper_xc.cpp ../xc_functional_wrapper_gcxc.cpp ../xc_functional_wrapper_tauxc.cpp @@ -317,4 +332,10 @@ AddTest( ../xc_funct_exch_lda.cpp ../xc_funct_hcth.cpp ../../../module_base/matrix.cpp ../../../module_base/timer.cpp ../xc_functional_vxc.cpp -) \ No newline at end of file + ../../../module_base/matrix.cpp + ../../../module_base/memory.cpp + ../../../module_base/timer.cpp + ../../../module_base/libm/branred.cpp + ../../../module_base/libm/sincos.cpp +) + diff --git a/source/module_hamilt_general/module_xc/test/test_xc3.cpp b/source/module_hamilt_general/module_xc/test/test_xc3.cpp index 7b49fea0bc..601afc8863 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc3.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc3.cpp @@ -147,9 +147,13 @@ class XCTest_GRADWFC : public testing::Test protected: std::complex * grad = nullptr; + ModuleBase::Vector3 * gcar_wrapper = nullptr; + ModuleBase::Vector3 * kvec_c_wrapper = nullptr; ~XCTest_GRADWFC() { - delete[] grad; + delete [] grad; + delete [] gcar_wrapper; + delete [] kvec_c_wrapper; } void SetUp() @@ -157,8 +161,17 @@ class XCTest_GRADWFC : public testing::Test ModulePW::PW_Basis_K rhopw; rhopw.npwk = new int[1]; rhopw.npwk[0] = 5; + rhopw.npwk_max = 5; rhopw.nmaxgr = 5; rhopw.nrxx = 5; + rhopw.nks = 1; + gcar_wrapper = new ModuleBase::Vector3[rhopw.npwk_max]; + for (int ii = 0; ii < rhopw.npwk_max; ii++) + gcar_wrapper[ii] = ModuleBase::Vector3(0,0,0); + kvec_c_wrapper = new ModuleBase::Vector3(1,2,3); + + rhopw.gcar = gcar_wrapper; + rhopw.kvec_c = kvec_c_wrapper; std::complex rhog[5]; for (int i=0;i<5;i++) @@ -169,7 +182,7 @@ class XCTest_GRADWFC : public testing::Test grad = new std::complex[15]; - XC_Functional::grad_wfc(rhog, 0, grad, &rhopw, tpiba); + XC_Functional::grad_wfc, psi::DEVICE_CPU>(0, tpiba, &rhopw, rhog, grad); } }; diff --git a/source/module_hamilt_general/module_xc/test/test_xc5.cpp b/source/module_hamilt_general/module_xc/test/test_xc5.cpp index ec9ac075ed..bd2f87da8a 100644 --- a/source/module_hamilt_general/module_xc/test/test_xc5.cpp +++ b/source/module_hamilt_general/module_xc/test/test_xc5.cpp @@ -15,16 +15,6 @@ // v_xc_libxc, called by v_xc, when we use functionals from LIBXC // v_xc_meta, unified interface of mGGA functionals -template<> -void Parallel_Reduce::reduce_pool(double& object) -{ -#ifdef __MPI - double swap = object; - MPI_Allreduce(&swap , &object , 1, MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD); -#endif - return; -} - class XCTest_VXC : public testing::Test { protected: diff --git a/source/module_hamilt_general/module_xc/test/xc3_mock.h b/source/module_hamilt_general/module_xc/test/xc3_mock.h index 8ff382b95a..48f5ce2bc7 100644 --- a/source/module_hamilt_general/module_xc/test/xc3_mock.h +++ b/source/module_hamilt_general/module_xc/test/xc3_mock.h @@ -1,6 +1,6 @@ // I'm mocking FFT here because it is not possible to write // unit tests with FFT - +#include namespace ModulePW { PW_Basis::PW_Basis(){}; @@ -63,17 +63,73 @@ namespace ModulePW out[i] = -ModuleBase::IMAG_UNIT * in[i]; } } - template void PW_Basis_K::recip2real(const std::complex* in, - std::complex* out, - const int ik, - const bool add, - const double factor) const; + template void PW_Basis_K::recip2real(const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; - ModuleBase::Vector3 PW_Basis_K::getgpluskcar(int, int) const - { + ModuleBase::Vector3 PW_Basis_K::getgpluskcar(int, int) const + { ModuleBase::Vector3 x = {1,2,3}; return x; - }; + } + + + template + void PW_Basis_K::real_to_recip(const Device* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const FPTYPE factor) const // in:(nplane,nx*ny) ; out(nz, ns) + { + for (int i=0;i + void PW_Basis_K::recip_to_real(const Device* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const FPTYPE factor) const + { + for (int i = 0; i < nrxx; i++) + { + out[i] = -ModuleBase::IMAG_UNIT * in[i]; + } + } + + template void PW_Basis_K::real_to_recip(const psi::DEVICE_CPU* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; + template void PW_Basis_K::recip_to_real(const psi::DEVICE_CPU* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; +#if __CUDA || __ROCM + template void PW_Basis_K::real_to_recip(const psi::DEVICE_GPU* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; + + template void PW_Basis_K::recip_to_real(const psi::DEVICE_GPU* ctx, + const std::complex* in, + std::complex* out, + const int ik, + const bool add, + const double factor) const; +#endif FFT::FFT(){}; FFT::~FFT(){}; @@ -89,6 +145,7 @@ namespace ModulePW namespace ModuleBase { void WARNING_QUIT(const std::string &file,const std::string &description) {return ;} + void WARNING(const std::string &file,const std::string &description) {}; void Matrix3::Identity(){}; @@ -97,6 +154,7 @@ namespace ModuleBase void TITLE(const std::string &class_function_name,bool disable){}; void TITLE(const std::string &class_name,const std::string &function_name,bool disable){}; + } namespace GlobalV @@ -105,9 +163,12 @@ namespace GlobalV bool CAL_STRESS = 0; int CAL_FORCE = 0; int NSPIN; + int NPOL; double XC_TEMPERATURE; bool DOMAG; bool DOMAG_Z; + std::ofstream ofs_device; + std::ofstream ofs_running; } namespace GlobalC @@ -139,3 +200,31 @@ void UnitCell::cal_ux() InfoNonlocal::InfoNonlocal(){}; InfoNonlocal::~InfoNonlocal(){}; #endif + +namespace Parallel_Reduce +{ + /// reduce in all process + template + void reduce_all(T& object){}; + template + void reduce_all(T* object, const int n){}; + template + void reduce_pool(T& object){}; + template + void reduce_pool(T* object, const int n){}; + + template<> + void Parallel_Reduce::reduce_pool(double& object) + { + #ifdef __MPI + double swap = object; + MPI_Allreduce(&swap , &object , 1, MPI_DOUBLE , MPI_SUM , MPI_COMM_WORLD); + #endif + return; + } + template void reduce_all(double& object); + template void reduce_all(double* object, const int n); + template void reduce_pool(float& object); + template void reduce_pool(float* object, const int n); + template void reduce_pool(double* object, const int n); +} \ No newline at end of file diff --git a/source/module_hamilt_general/module_xc/xc_functional.h b/source/module_hamilt_general/module_xc/xc_functional.h index b32352ccaa..71ce3785cb 100644 --- a/source/module_hamilt_general/module_xc/xc_functional.h +++ b/source/module_hamilt_general/module_xc/xc_functional.h @@ -10,7 +10,7 @@ #else #include "xc_funcs.h" #endif // ifdef USE_LIBXC - +#include "module_base/macros.h" #include "module_base/global_function.h" #include "module_base/global_variable.h" #include "module_base/vector3.h" @@ -19,6 +19,7 @@ #include "module_basis/module_pw/pw_basis_k.h" #include "module_elecstate/module_charge/charge.h" #include "module_cell/unitcell.h" + class XC_Functional { public: @@ -209,11 +210,14 @@ class XC_Functional const UnitCell* ucell, std::vector& stress_gga, const bool is_stress = 0); - static void grad_wfc(const std::complex* rhog, - const int ik, - std::complex* grad, - const ModulePW::PW_Basis_K* wfc_basis, - const double tpiba); + template ::type> + static void grad_wfc( + const int ik, + const Real tpiba, + const ModulePW::PW_Basis_K* wfc_basis, + const T* rhog, + T* grad); static void grad_rho(const std::complex* rhog, ModuleBase::Vector3* gdr, const ModulePW::PW_Basis* rho_basis, diff --git a/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp b/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp index 8fba1870c6..75acbbcfe4 100644 --- a/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp +++ b/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp @@ -9,8 +9,14 @@ // and gives the spin up and spin down components of the charge. #include "xc_functional.h" +#include "module_base/timer.h" #include "module_basis/module_pw/pw_basis_k.h" +#include +#include +#include +#include + // from gradcorr.f90 void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, const Charge* const chr, ModulePW::PW_Basis* rhopw, const UnitCell *ucell, @@ -584,41 +590,46 @@ void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, return; } -void XC_Functional::grad_wfc(const std::complex* rhog, - const int ik, - std::complex* grad, - const ModulePW::PW_Basis_K* wfc_basis, - const double tpiba) +template +void XC_Functional::grad_wfc( + const int ik, + const Real tpiba, + const ModulePW::PW_Basis_K* wfc_basis, + const T* rhog, + T* grad) { + using ct_Device = typename ct::PsiToContainer::type; const int npw_k = wfc_basis->npwk[ik]; - std::complex *Porter = new std::complex [wfc_basis->nmaxgr]; + + auto porter = std::move(ct::Tensor( + ct::DataTypeToEnum::value, ct::DeviceTypeToEnum::value, {wfc_basis->nmaxgr})); + auto gcar = ct::TensorMap( + &wfc_basis->gcar[0][0], ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, {wfc_basis->nks * wfc_basis->npwk_max, 3}).to_device(); + auto kvec_c = ct::TensorMap( + &wfc_basis->kvec_c[0][0],ct::DataType::DT_DOUBLE, ct::DeviceType::CpuDevice, {wfc_basis->nks, 3}).to_device(); + + auto xc_functional_grad_wfc_solver + = hamilt::xc_functional_grad_wfc_op(); - for(int ipol=0; ipol<3; ipol++) - { -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 1024) -#endif - for(int ig=0; iggetgpluskcar(ik,ig)[ipol] * tpiba; - // calculate the charge density gradient in reciprocal space. - Porter[ig] = std::complex(0.0,kplusg) * rhog[ig]; - } + for(int ipol=0; ipol<3; ipol++) { + xc_functional_grad_wfc_solver( + ik, ipol, npw_k, wfc_basis->npwk_max, // Integers + tpiba, // Double + gcar.template data(), // Array of Real + kvec_c.template data(), // Array of double + rhog, porter.data()); // Array of std::complex // bring the gdr from G --> R - wfc_basis->recip2real(Porter, Porter, ik); + Device * ctx = nullptr; + wfc_basis->recip_to_real(ctx, porter.data(), porter.data(), ik); -#ifdef _OPENMP -#pragma omp parallel for schedule(static, 1024) -#endif - for (int ir = 0; ir < wfc_basis->nrxx; ++ir) - grad[ipol * wfc_basis->nrxx + ir] = Porter[ir]; - }//end loop ipol - delete[] Porter; - return; + xc_functional_grad_wfc_solver( + ipol, wfc_basis->nrxx, // Integers + porter.data(), grad); // Array of std::complex + } } + void XC_Functional::grad_rho(const std::complex* rhog, ModuleBase::Vector3* gdr, const ModulePW::PW_Basis* rho_basis, @@ -736,3 +747,7 @@ void XC_Functional::noncolin_rho(double *rhoout1, double *rhoout2, double *neg, return; } +template void XC_Functional::grad_wfc, psi::DEVICE_CPU, double>(const int ik, const double tpiba, const ModulePW::PW_Basis_K* wfc_basis, const std::complex* rhog, std::complex* grad); +#if __CUDA || __ROCM +template void XC_Functional::grad_wfc, psi::DEVICE_GPU, double>(const int ik, const double tpiba, const ModulePW::PW_Basis_K* wfc_basis, const std::complex* rhog, std::complex* grad); +#endif // __CUDA || __ROCM \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu index 43fbabe0b2..505e2ce6ee 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/cuda/stress_op.cu @@ -3,6 +3,7 @@ #include #include +#include #include @@ -21,6 +22,28 @@ void warp_reduce(FPTYPE & val) { } } +template +__global__ void cal_stress_mgga( + const int spin, + const int nrxx, + const T w1, + const thrust::complex * gradwfc, + T * crosstaus) +{ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx >= nrxx) { return; } + int ipol = 0; + for (int ix = 0; ix < 3; ix++) { + for (int iy = 0; iy < ix + 1; iy++) { + crosstaus[spin * nrxx * 6 + ipol * nrxx + idx] + += 2.0 * w1 + * (gradwfc[ix * nrxx + idx].real() * gradwfc[iy*nrxx + idx].real() + + gradwfc[ix * nrxx + idx].imag() * gradwfc[iy*nrxx + idx].imag()); + ipol += 1; + } + } +} + template __global__ void cal_dbecp_noevc_nl( const int ipol, @@ -213,10 +236,27 @@ void cal_stress_nl_op::operator() ( stress);// array of data } -template struct cal_dbecp_noevc_nl_op; -template struct cal_stress_nl_op; +template +void cal_stress_mgga_op::operator()( + const int& spin, + const int& nrxx, + const Real& w1, + const T * gradwfc, + Real * crosstaus) +{ + auto gradwfc_ = reinterpret_cast*>(gradwfc); + const int block = (nrxx + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + cal_stress_mgga<<>>( + spin, nrxx, w1, gradwfc_, crosstaus); +} + +template struct cal_stress_mgga_op, psi::DEVICE_GPU>; +template struct cal_stress_mgga_op, psi::DEVICE_GPU>; +template struct cal_dbecp_noevc_nl_op; template struct cal_dbecp_noevc_nl_op; + +template struct cal_stress_nl_op; template struct cal_stress_nl_op; } // namespace hamilt \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.cpp index 872db30dba..dfa6836df2 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.cpp @@ -1,6 +1,8 @@ #include "module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" +#include + namespace hamilt{ template @@ -132,6 +134,31 @@ struct cal_stress_nl_op { } }; +template +void cal_stress_mgga_op::operator()( + const int& spin, + const int& nrxx, + const Real& w1, + const T * gradwfc, + Real * crosstaus) +{ + for (int ir = 0; ir < nrxx; ir++) { + int ipol = 0; + for (int ix = 0; ix < 3; ix++) { + for (int iy = 0; iy < ix + 1; iy++) { + crosstaus[spin * nrxx * 6 + ipol * nrxx + ir] + += 2.0 * w1 + * (gradwfc[ix*nrxx + ir].real() * gradwfc[iy*nrxx + ir].real() + + gradwfc[ix*nrxx + ir].imag() * gradwfc[iy*nrxx + ir].imag()); + ipol += 1; + } + } + } +} + +template struct cal_stress_mgga_op, psi::DEVICE_CPU>; +template struct cal_stress_mgga_op, psi::DEVICE_CPU>; + template struct cal_dbecp_noevc_nl_op; template struct cal_stress_nl_op; diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h b/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h index 931653d79a..d9a9d0f04a 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/stress_op.h @@ -3,6 +3,8 @@ #include "module_psi/psi.h" #include +#include + namespace hamilt { @@ -101,6 +103,17 @@ namespace hamilt { FPTYPE* stress); }; +template +struct cal_stress_mgga_op { + using Real = typename GetTypeReal::type; + void operator()( + const int& spin, + const int& nrxx, + const Real& w1, + const T * gradwfc, + Real * crosstaus); +}; + #if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM template struct cal_dbecp_noevc_nl_op { diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/CMakeLists.txt b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/CMakeLists.txt index d502e5a43f..582d17b5bc 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/CMakeLists.txt +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/CMakeLists.txt @@ -1,11 +1,10 @@ remove_definitions(-D__LCAO) remove_definitions(-D__DEEPKS) -remove_definitions(-D__CUDA) -remove_definitions(-D__ROCM) AddTest( TARGET Hamilt_Kernels_UTs - LIBS ${math_libs} device base + LIBS ${math_libs} device base container SOURCES ekinetic_op_test.cpp nonlocal_op_test.cpp veff_op_test.cpp meta_op_test.cpp force_op_test.cpp stress_op_test.cpp wf_op_test.cpp vnl_op_test.cpp + stress_op_mgga_test.cpp ) diff --git a/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_mgga_test.cpp b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_mgga_test.cpp new file mode 100644 index 0000000000..d8f8a9c4c9 --- /dev/null +++ b/source/module_hamilt_pw/hamilt_pwdft/kernels/test/stress_op_mgga_test.cpp @@ -0,0 +1,45 @@ +#include + +#include +#include + +namespace hamilt { + +template +class StressMggaTest : public testing::Test { +public: + StressMggaTest() = default; + + ~StressMggaTest() override = default; +}; + +TYPED_TEST_SUITE(StressMggaTest, base::utils::ComplexTypes); + +TYPED_TEST(StressMggaTest, cal_stress_mgga_op) { + using Type = typename std::tuple_element<0, decltype(TypeParam())>::type; + using Device = typename std::tuple_element<1, decltype(TypeParam())>::type; + using Real = typename GetTypeReal::type; + + const int nrxx = 4; + const int nspin = 0; + const Real w1 = 0.00031685874122631746; + + ct::Tensor gradwfc = std::move(ct::Tensor( + {static_cast( 0.0000001909712), static_cast(-0.0000001687233), static_cast(-0.0000002392604), + static_cast( 0.0000000154653), static_cast( 0.0000004211339), static_cast( 0.0000007242030), + static_cast( 0.0000007559914), static_cast( 0.0000005232479), static_cast( 0.0000001903915), + static_cast(-0.0000000207916), static_cast( 0.0000000366767), static_cast( 0.0000003353377)}).to_device()); + + ct::Tensor crosstaus = std::move(ct::Tensor( + ct::DataTypeToEnum::value, ct::DeviceTypeToEnum::value, {nrxx * 6})); + crosstaus.zero(); + ct::Tensor expected_crosstaus = crosstaus; + expected_crosstaus.zero(); + + auto cal_stress_mgga_solver = cal_stress_mgga_op::type>(); + cal_stress_mgga_solver(nspin, nrxx, w1, gradwfc.data(), crosstaus.data()); + + EXPECT_EQ(crosstaus, expected_crosstaus); +} + +} // namespace hamilt \ No newline at end of file diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func.h b/source/module_hamilt_pw/hamilt_pwdft/stress_func.h index 6e960727c8..5d51e529e3 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func.h +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func.h @@ -129,7 +129,7 @@ class Stress_Func const Charge* const chr, K_Vectors* p_kv, ModulePW::PW_Basis_K* wfc_basis, - const psi::Psi>* psi_in = nullptr); // gga part in PW basis + const psi::Psi, Device>* psi_in); // gga part in PW basis // 7) the stress from the non-local pseudopotentials void stress_nl(ModuleBase::matrix& sigma, diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp index 549d0599a9..cac6d8d369 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_mgga.cpp @@ -3,6 +3,9 @@ #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "stress_func.h" +#include +#include + // calculate the Pulay term of mGGA stress correction in PW template void Stress_Func::stress_mgga(ModuleBase::matrix& sigma, @@ -11,7 +14,7 @@ void Stress_Func::stress_mgga(ModuleBase::matrix& sigma, const Charge* const chr, K_Vectors* p_kv, ModulePW::PW_Basis_K* wfc_basis, - const psi::Psi>* psi_in) + const psi::Psi, Device>* psi_in) { ModuleBase::timer::tick("Stress_Func", "stress_mgga"); @@ -22,69 +25,50 @@ void Stress_Func::stress_mgga(ModuleBase::matrix& sigma, const int nrxx = wfc_basis->nrxx; std::complex* psi; - int ipol2xy[3][3]; + int ipol2xy[3][3] = {{0, 1, 3}, {1, 2, 4}, {3, 4, 5}}; FPTYPE sigma_mgga[3][3]; - std::complex* gradwfc = new std::complex[nrxx * 3]; - ModuleBase::GlobalFunc::ZEROS(gradwfc, nrxx * 3); + using ct_Device = typename ct::PsiToContainer::type; - FPTYPE** crosstaus = new FPTYPE*[GlobalV::NSPIN]; - for (int is = 0; is < GlobalV::NSPIN; ++is) - { - crosstaus[is] = new FPTYPE[nrxx * 6]; - ModuleBase::GlobalFunc::ZEROS(crosstaus[is], 6*nrxx); - } + auto gradwfc = ct::Tensor( + ct::DataTypeToEnum>::value, + ct::DeviceTypeToEnum::value, + {nrxx * 3}); + auto crosstaus = ct::Tensor( + ct::DataTypeToEnum::value, + ct::DeviceTypeToEnum::value, + {GlobalV::NSPIN, nrxx * 6}); + crosstaus.zero(); // Must be zeroed out + auto cal_stress_mgga_solver = hamilt::cal_stress_mgga_op, Device>(); for (int ik = 0; ik < p_kv->nks; ik++) { if (GlobalV::NSPIN == 2) current_spin = p_kv->isk[ik]; const int npw = p_kv->ngk[ik]; - psi = new complex[npw]; for (int ibnd = 0; ibnd < GlobalV::NBANDS; ibnd++) { const FPTYPE w1 = wg(ik, ibnd) / GlobalC::ucell.omega; - const std::complex* ppsi = nullptr; - ppsi = &(psi_in[0](ik, ibnd, 0)); - for (int ig = 0; ig < npw; ig++) - { - psi[ig] = ppsi[ig]; - } - XC_Functional::grad_wfc(psi, ik, gradwfc, wfc_basis, GlobalC::ucell.tpiba); - - int ipol = 0; - for (int ix = 0; ix < 3; ix++) - { - for (int iy = 0; iy < ix + 1; iy++) - { - ipol2xy[ix][iy] = ipol; - ipol2xy[iy][ix] = ipol; - for (int ir = 0; ir < nrxx; ir++) - { - crosstaus[current_spin][ipol*nrxx + ir] += 2.0 * w1 - * (gradwfc[ix*nrxx + ir].real() * gradwfc[iy*nrxx + ir].real() - + gradwfc[ix*nrxx + ir].imag() * gradwfc[iy*nrxx + ir].imag()); - } - ipol += 1; - } - } + const std::complex* psi = &psi_in[0](ik, ibnd, 0); + XC_Functional::grad_wfc, Device>(ik, GlobalC::ucell.tpiba, wfc_basis, psi, gradwfc.data>()); + cal_stress_mgga_solver( + current_spin, nrxx, w1, gradwfc.data>(), crosstaus.data()); } // band loop - delete[] psi; + // delete[] psi; } // k loop - + auto crosstaus_host = crosstaus.to_device(); + auto crosstaus_pack = crosstaus_host.accessor(); #ifdef __MPI for (int is = 0; is < GlobalV::NSPIN; ++is) { for (int ipol = 0; ipol < 6; ++ipol) { - chr->reduce_diff_pools(crosstaus[is] + ipol * nrxx); + chr->reduce_diff_pools(&crosstaus_pack[is][ipol * nrxx]); } } #endif - delete[] gradwfc; - for(int ix = 0; ix < 3; ix++) { for(int iy = 0; iy < 3; iy++) @@ -105,19 +89,13 @@ void Stress_Func::stress_mgga(ModuleBase::matrix& sigma, for (int ir = 0; ir < nrxx; ir++) { FPTYPE x - = v_ofk(is, ir) * (chr->kin_r[is][ir] * delta + crosstaus[is][ipol2xy[ix][iy] * nrxx + ir]); + = v_ofk(is, ir) * (chr->kin_r[is][ir] * delta + crosstaus_pack[is][ipol2xy[ix][iy] * nrxx + ir]); sigma_mgga[ix][iy] += x; } } } } - for (int is = 0; is < GlobalV::NSPIN; is++) - { - delete[] crosstaus[is]; - } - delete[] crosstaus; - #ifdef __MPI for (int l = 0; l < 3; l++) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_pw.cpp index 5386d11fcd..8e537053fb 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_pw.cpp @@ -85,7 +85,7 @@ void Stress_PW::cal_stress(ModuleBase::matrix& sigmatot, pelec->charge, p_kv, wfc_basis, - psi_in); + d_psi_in); } // local contribution From 8fd89806e7426b3b7147568a8de56f5aeb202467 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:01:10 +0800 Subject: [PATCH 2/3] add a UnitTest for AutosetMag in read_atom_position() (#3471) --- source/module_cell/test/unitcell_test.cpp | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/source/module_cell/test/unitcell_test.cpp b/source/module_cell/test/unitcell_test.cpp index a4a944eb2d..1bc4da54a1 100644 --- a/source/module_cell/test/unitcell_test.cpp +++ b/source/module_cell/test/unitcell_test.cpp @@ -1385,6 +1385,60 @@ TEST_F(UcellTest,ReadAtomPositionsCAU) remove("read_atom_positions.warn"); } +TEST_F(UcellTest,ReadAtomPositionsAutosetMag) +{ + std::string fn = "./support/STRU_MgO"; + std::ifstream ifa(fn.c_str()); + std::ofstream ofs_running; + std::ofstream ofs_warning; + ofs_running.open("read_atom_positions.tmp"); + ofs_warning.open("read_atom_positions.warn"); + //mandatory preliminaries + ucell->ntype = 2; + ucell->atoms = new Atom[ucell->ntype]; + ucell->set_atom_flag = true; + GlobalV::test_pseudo_cell = 2; + GlobalV::BASIS_TYPE = "lcao"; + GlobalV::deepks_setorb = true; + GlobalV::NSPIN = 2; + EXPECT_NO_THROW(ucell->read_atom_species(ifa,ofs_running)); + EXPECT_DOUBLE_EQ(ucell->latvec.e11,4.27957); + EXPECT_DOUBLE_EQ(ucell->latvec.e22,4.27957); + EXPECT_DOUBLE_EQ(ucell->latvec.e33,4.27957); + //mandatory preliminaries + delete[] ucell->magnet.start_magnetization; + ucell->magnet.start_magnetization = new double[ucell->ntype]; + ucell->read_atom_positions(ifa,ofs_running,ofs_warning); + for (int it = 0;it < ucell->ntype; it++) + { + for (int ia = 0;ia < ucell->atoms[it].na; ia++) + { + EXPECT_DOUBLE_EQ(ucell->atoms[it].mag[ia],1.0); + EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].x,1.0); + } + } + //for nspin == 4 + GlobalV::NSPIN = 4; + delete[] ucell->magnet.start_magnetization; + ucell->magnet.start_magnetization = new double[ucell->ntype]; + ucell->read_atom_positions(ifa,ofs_running,ofs_warning); + for (int it = 0;it < ucell->ntype; it++) + { + for (int ia = 0;ia < ucell->atoms[it].na; ia++) + { + EXPECT_DOUBLE_EQ(ucell->atoms[it].mag[ia],sqrt(pow(1.0,2)+pow(1.0,2)+pow(1.0,2))); + EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].x,1.0); + EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].y,1.0); + EXPECT_DOUBLE_EQ(ucell->atoms[it].m_loc_[ia].z,1.0); + } + } + ofs_running.close(); + ofs_warning.close(); + ifa.close(); + remove("read_atom_positions.tmp"); + remove("read_atom_positions.warn"); +} + TEST_F(UcellTest,ReadAtomPositionsWarning1) { std::string fn = "./support/STRU_MgO_WarningC1"; From 8a969c746e9c39048de7d2db760993713f90ecef Mon Sep 17 00:00:00 2001 From: Jie Li <76780849+jieli-matrix@users.noreply.github.com> Date: Fri, 12 Jan 2024 17:01:47 +0800 Subject: [PATCH 3/3] add precision tests for zeros and derivatives (#3445) --- source/module_base/test/math_sphbes_test.cpp | 410 +++++++++++-------- 1 file changed, 248 insertions(+), 162 deletions(-) diff --git a/source/module_base/test/math_sphbes_test.cpp b/source/module_base/test/math_sphbes_test.cpp index b79e3f8da7..521d4dc2f4 100644 --- a/source/module_base/test/math_sphbes_test.cpp +++ b/source/module_base/test/math_sphbes_test.cpp @@ -1,27 +1,28 @@ -#include"../math_sphbes.h" -#include -#include +#include "../math_sphbes.h" + +#include +#include #ifdef __MPI -#include"mpi.h" +#include "mpi.h" #endif -#include"gtest/gtest.h" +#include "gtest/gtest.h" #define doublethreshold 1e-7 #define MAX(x, y) ((x) > (y) ? (x) : (y)) /************************************************ -* unit test of class Integral -***********************************************/ + * unit test of class Integral + ***********************************************/ /** * Note: this unit test try to ensure the invariance * of the spherical Bessel produced by class Sphbes, - * and the reference results are produced by ABACUS + * and the reference results are produced by ABACUS * at 2022-1-27. - * - * Tested function: + * + * Tested function: * - Spherical_Bessel. * - Spherical_Bessel_Roots * - overloading of Spherical_Bessel. This funnction sets sjp[i] to 1.0 when i < msh. @@ -32,173 +33,175 @@ double mean(const double* vect, const int totN) { double meanv = 0.0; - for (int i=0; i< totN; ++i) {meanv += vect[i]/totN;} + for (int i = 0; i < totN; ++i) + { + meanv += vect[i] / totN; + } return meanv; } class Sphbes : public testing::Test { - protected: - - int msh = 700; - int l0 = 0; - int l1 = 1; - int l2 = 2; - int l3 = 3; - int l4 = 4; - int l5 = 5; - int l6 = 6; - int l7 = 7; - double q = 1.0; - double *r = new double[msh]; - double *jl = new double[msh]; - double *djl = new double[msh]; + protected: + int msh = 700; + int l0 = 0; + int l1 = 1; + int l2 = 2; + int l3 = 3; + int l4 = 4; + int l5 = 5; + int l6 = 6; + int l7 = 7; + double q = 1.0; + double* r = new double[msh]; + double* jl = new double[msh]; + double* djl = new double[msh]; void SetUp() { - for(int i=0; i(&y), sizeof(double))){ - Y[i] = y; ++i; + while (fin.read(reinterpret_cast(&y), sizeof(double))) + { + Y[i] = y; + ++i; } fin.close(); - for(int i = 0; i < nr; ++i){ + for (int i = 0; i < nr; ++i) + { r[i] = (i + 1) * dr; } // test for new sphbesj for (int l = l_lo; l <= l_hi; ++l) { - for(int i = 0; i< nr; ++i){ + for (int i = 0; i < nr; ++i) + { EXPECT_NEAR(ModuleBase::Sphbes::sphbesj(l, r[i] * q), Y[l * nr + i], 1e-12); } - } + } // test for old Bessel - // most of l cases precision failed to achieve 1e-12 + // most of l cases precision failed to achieve 1e-12 double* jl_old = new double[nr + 10]; for (int l = l_lo; l <= l_hi; ++l) { ModuleBase::Sphbes::Spherical_Bessel(nr, r, q, l, jl_old); double errs = 0.0; - for(int i = 0; i< nr; ++i){ + for (int i = 0; i < nr; ++i) + { errs += MAX(jl_old[i] - Y[l * nr + i], Y[l * nr + i] - jl_old[i]); } // std::cout << " l = " << l << ", errors: " << std::scientific << errs / nr << std::endl; @@ -313,20 +321,24 @@ TEST_F(Sphbes, SphericalBesselPrecisionNearZero) double y; std::ifstream fin("data/bjxo.bin", std::ios::binary); int i = 0; - while (fin.read(reinterpret_cast(&y), sizeof(double))){ - Y[i] = y; ++i; + while (fin.read(reinterpret_cast(&y), sizeof(double))) + { + Y[i] = y; + ++i; } fin.close(); // generate x x[0] = 1.0 / (1 << 5); - for(int i = 1; i < n; i++){ - x[i] = x[i-1] / 2; + for (int i = 1; i < n; i++) + { + x[i] = x[i - 1] / 2; } // test for sphbesj near zero for (int l = l_lo; l <= l_hi; ++l) { - for(int i = 0; i< n; ++i){ + for (int i = 0; i < n; ++i) + { EXPECT_NEAR(ModuleBase::Sphbes::sphbesj(l, x[i]), Y[l * n + i], 1e-12); } } @@ -351,29 +363,103 @@ TEST_F(Sphbes, Zeros) } } -int main(int argc, char **argv) +TEST_F(Sphbes, ZerosOld) +{ + // This test checks whether Spherical_Bessel_Roots properly computes the zeros of sphbesj. + + int lmax = 7; + int nzeros = 50; + double* zeros = new double[nzeros]; + for (int l = 0; l <= lmax; ++l) + { + ModuleBase::Sphbes::Spherical_Bessel_Roots(nzeros, l, 1e-7, zeros, 1.0); + for (int i = 0; i < nzeros; ++i) + { + EXPECT_LT(std::abs(ModuleBase::Sphbes::sphbesj(l, zeros[i])), 1e-7); + } + } +} + +TEST_F(Sphbes, Derivatives) +{ + int lmax = 20; + int numr = 20; + double* r = new double[numr]; + double* djl = new double[numr]; + double q = 0.001; + r[0] = 1.0; + for (int i = 0; i < numr; ++i) + { + r[i + 1] = r[i] * 2.0; + } + + for (int l = 0; l <= lmax; ++l) + { + ModuleBase::Sphbes::dsphbesj(numr, r, q, l, djl); + for (int i = 0; i < numr; ++i) + { + double h = 1e-8; + EXPECT_LT( + abs(djl[i] * 2 * h + - (ModuleBase::Sphbes::sphbesj(l, q * r[i] + h) - ModuleBase::Sphbes::sphbesj(l, q * r[i] - h))), + 1e-14); + } + } +} + +TEST_F(Sphbes, DerivativesOld) +{ + int lmax = 20; + int numr = 20; + double* r = new double[numr]; + double* djl = new double[numr]; + double q = 0.001; + r[0] = 1.0; + for (int i = 0; i < numr; ++i) + { + r[i + 1] = r[i] * 2.0; + } + + for (int l = 0; l < lmax; l++) + { + ModuleBase::Sphbes::dSpherical_Bessel_dx(numr, r, q, l, djl); + for (int i = 0; i < numr; i++) + { + double h = 1e-8; + double errs + = abs(djl[i] * 2 * h + - (ModuleBase::Sphbes::sphbesj(l, q * r[i] + h) - ModuleBase::Sphbes::sphbesj(l, q * r[i] - h))); + if (errs > 1e-14) + { + std::cout << "l = " << l << ", r = " << r[i] << ", errs = " << errs << std::endl; + } + } + } +} + +int main(int argc, char** argv) { #ifdef __MPI - MPI_Init(&argc, &argv); + MPI_Init(&argc, &argv); #endif - testing::InitGoogleTest(&argc, argv); + testing::InitGoogleTest(&argc, argv); int result = RUN_ALL_TESTS(); #ifdef __MPI - MPI_Finalize(); + MPI_Finalize(); #endif - return result; + return result; } -TEST_F(Sphbes,SphericalBesselsjp) +TEST_F(Sphbes, SphericalBesselsjp) { int iii; - double *sjp = new double[msh]; - ModuleBase::Sphbes::Spherical_Bessel(msh,r,q,l0,jl,sjp); - EXPECT_NEAR(mean(jl,msh)/0.2084468748396,1.0,doublethreshold); - for(int iii = 0 ; iii