From fbb6f2cccd2643405dc8ce7bed57582235f9c4b8 Mon Sep 17 00:00:00 2001 From: Zhao Tianqi Date: Mon, 22 Apr 2024 10:23:02 +0800 Subject: [PATCH 01/18] Feature: output mag force in spin constraint calculation (#4037) * Feature: output mag force in spin constraint calculation * add ut for print_Mag_Force --- source/module_esolver/esolver_ks_lcao.cpp | 1 + .../module_deltaspin/lambda_loop_helper.cpp | 4 +- .../module_deltaspin/spin_constrain.cpp | 53 +++++++++++++++++-- .../module_deltaspin/spin_constrain.h | 3 ++ .../test/cal_mw_helper_test.cpp | 6 ++- .../test/lambda_loop_helper_test.cpp | 5 ++ .../test/spin_constrain_test.cpp | 6 ++- 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index d142ae8e24..949e6c4ba7 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -1131,6 +1131,7 @@ void ESolver_KS_LCAO::after_scf(const int istep) { SpinConstrain& sc = SpinConstrain::getScInstance(); sc.cal_MW(istep, &(this->LM), true); + sc.print_Mag_Force(); } if (!GlobalV::CAL_FORCE && !GlobalV::CAL_STRESS) diff --git a/source/module_hamilt_lcao/module_deltaspin/lambda_loop_helper.cpp b/source/module_hamilt_lcao/module_deltaspin/lambda_loop_helper.cpp index dcebe43f7f..38a53b3e68 100644 --- a/source/module_hamilt_lcao/module_deltaspin/lambda_loop_helper.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/lambda_loop_helper.cpp @@ -4,8 +4,8 @@ template <> void SpinConstrain, psi::DEVICE_CPU>::print_termination() { - print_2d("after-optimization spin: (print in the inner loop): ", this->Mi_, this->nspin_); - print_2d("after-optimization lambda: (print in the inner loop): ", this->lambda_, this->nspin_); + print_2d("after-optimization spin (uB): (print in the inner loop): ", this->Mi_, this->nspin_); + print_2d("after-optimization lambda (Ry/uB): (print in the inner loop): ", this->lambda_, this->nspin_); std::cout << "Inner optimization for lambda ends." << std::endl; std::cout << "===============================================================================" << std::endl; } diff --git a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.cpp b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.cpp index 8fce1484fc..e2c73335c3 100644 --- a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.cpp @@ -1,4 +1,5 @@ #include "spin_constrain.h" +#include "module_base/formatter_fmt.h" #include @@ -541,20 +542,66 @@ void SpinConstrain::print_Mi(bool print) int nat = this->get_nat(); if (print) { + formatter::Fmt fmt; + fmt.set_width(20); + fmt.set_precision(10); + fmt.set_fillChar(' '); + fmt.set_fixed(false); + fmt.set_right(true); + fmt.set_error(false); + std::cout << "Total Magnetism (uB): " << std::endl; for (int iat = 0; iat < nat; ++iat) { if (this->nspin_ == 2) { - std::cout << "Total Magnetism on atom: " << iat << " " << std::setprecision(10) << " (" << Mi_[iat].z << ")" << std::endl; + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(Mi_[iat].z) << std::endl; } else if (this->nspin_ ==4) { - std::cout << "Total Magnetism on atom: " << iat << " " << std::setprecision(10) << " (" << Mi_[iat].x - << ", " << Mi_[iat].y << ", " << Mi_[iat].z << ")" << std::endl; + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(Mi_[iat].x) << fmt.format(Mi_[iat].y) << fmt.format(Mi_[iat].z) << std::endl; } } } } +/// print magnetic force (defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat]) +template +void SpinConstrain::print_Mag_Force() +{ + this->check_atomCounts(); + int nat = this->get_nat(); + formatter::Fmt fmt; + fmt.set_width(20); + fmt.set_precision(10); + fmt.set_fillChar(' '); + fmt.set_fixed(false); + fmt.set_right(true); + fmt.set_error(false); + std::cout << "Final optimal lambda (Ry/uB): " << std::endl; + for (int iat = 0; iat < nat; ++iat) + { + if (this->nspin_ == 2) + { + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(lambda_[iat].z) << std::endl; + } + else if (this->nspin_ ==4) + { + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(lambda_[iat].x) << fmt.format(lambda_[iat].y) << fmt.format(lambda_[iat].z) << std::endl; + } + } + std::cout << "Magnetic force (Ry/uB): " << std::endl; + for (int iat = 0; iat < nat; ++iat) + { + if (this->nspin_ == 2) + { + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(-lambda_[iat].z) << std::endl; + } + else if (this->nspin_ ==4) + { + std::cout << "ATOM " << std::left << std::setw(6) << iat << fmt.format(-lambda_[iat].x) << fmt.format(-lambda_[iat].y) << fmt.format(-lambda_[iat].z) << std::endl; + } + } +} + template class SpinConstrain, psi::DEVICE_CPU>; template class SpinConstrain; \ No newline at end of file diff --git a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h index 3ec76ac04f..a966467586 100644 --- a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h +++ b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h @@ -87,6 +87,9 @@ class SpinConstrain /// print mi void print_Mi(bool print = false); + /// print magnetic force, defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat]) + void print_Mag_Force(); + /// collect_mw from matrix multiplication result void collect_MW(ModuleBase::matrix& MecMulP, const ModuleBase::ComplexMatrix& mud, int nw, int isk); diff --git a/source/module_hamilt_lcao/module_deltaspin/test/cal_mw_helper_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/cal_mw_helper_test.cpp index 628cc4f2c1..4c8bb14dac 100644 --- a/source/module_hamilt_lcao/module_deltaspin/test/cal_mw_helper_test.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/test/cal_mw_helper_test.cpp @@ -66,7 +66,8 @@ TEST_F(SpinConstrainTest, CalculateMW) testing::internal::CaptureStdout(); sc.print_Mi(true); std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Total Magnetism on atom: 0 (2, 3, 4)")); + EXPECT_THAT(output, testing::HasSubstr("Total Magnetism (uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 0 2.0000000000e+00 3.0000000000e+00 4.0000000000e+00")); } TEST_F(SpinConstrainTest, CollectMW) @@ -141,7 +142,8 @@ TEST_F(SpinConstrainTest, CalculateMWS2) testing::internal::CaptureStdout(); sc.print_Mi(true); std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Total Magnetism on atom: 0 (-1)")); + EXPECT_THAT(output, testing::HasSubstr("Total Magnetism (uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 0 -1.0000000000e+00")); } TEST_F(SpinConstrainTest, CollectMWS2) diff --git a/source/module_hamilt_lcao/module_deltaspin/test/lambda_loop_helper_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/lambda_loop_helper_test.cpp index d5d4e42144..a7183263fe 100644 --- a/source/module_hamilt_lcao/module_deltaspin/test/lambda_loop_helper_test.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/test/lambda_loop_helper_test.cpp @@ -40,10 +40,15 @@ TEST_F(SpinConstrainTest, PrintTermination) sc.set_sc_lambda(sc_lambda.data(), 1); testing::internal::CaptureStdout(); sc.print_termination(); + sc.print_Mag_Force(); std::string output = testing::internal::GetCapturedStdout(); EXPECT_THAT(output, testing::HasSubstr("Inner optimization for lambda ends.")); EXPECT_THAT(output, testing::HasSubstr("ATOM 1 0.0000000000e+00 0.0000000000e+00 0.0000000000e+00")); EXPECT_THAT(output, testing::HasSubstr("ATOM 1 1.0000000000e+00 2.0000000000e+00 3.0000000000e+00")); + EXPECT_THAT(output, testing::HasSubstr("Final optimal lambda (Ry/uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 1 1.0000000000e+00 2.0000000000e+00 3.0000000000e+00")); + EXPECT_THAT(output, testing::HasSubstr("Magnetic force (Ry/uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 0 -1.0000000000e+00 -2.0000000000e+00 -3.0000000000e+00")); } TEST_F(SpinConstrainTest, CheckRmsStop) diff --git a/source/module_hamilt_lcao/module_deltaspin/test/spin_constrain_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/spin_constrain_test.cpp index e23add1002..cd1f7c2956 100644 --- a/source/module_hamilt_lcao/module_deltaspin/test/spin_constrain_test.cpp +++ b/source/module_hamilt_lcao/module_deltaspin/test/spin_constrain_test.cpp @@ -398,10 +398,12 @@ TYPED_TEST(SpinConstrainTest, PrintMi) testing::internal::CaptureStdout(); this->sc.print_Mi(true); std::string output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Total Magnetism on atom: 0 (0, 0, 0)")); + EXPECT_THAT(output, testing::HasSubstr("Total Magnetism (uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 0 0.0000000000e+00 0.0000000000e+00 0.0000000000e+00")); this->sc.set_nspin(2); testing::internal::CaptureStdout(); this->sc.print_Mi(true); output = testing::internal::GetCapturedStdout(); - EXPECT_THAT(output, testing::HasSubstr("Total Magnetism on atom: 0 (0)")); + EXPECT_THAT(output, testing::HasSubstr("Total Magnetism (uB):")); + EXPECT_THAT(output, testing::HasSubstr("ATOM 0 0.0000000000e+00")); } \ No newline at end of file From 6eb39e8431adb480c58f10ca71b5425a73998d28 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Mon, 22 Apr 2024 10:28:55 +0800 Subject: [PATCH 02/18] CI: update cuda tests (#4032) Co-authored-by: Haozhi Han --- .github/workflows/cuda.yml | 81 ++++---------------------------------- 1 file changed, 7 insertions(+), 74 deletions(-) diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 2832a3e02a..6df2577bb8 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -4,88 +4,21 @@ on: workflow_dispatch: jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - name: Start EC2 runner - id: start-ec2-runner - uses: machulav/ec2-github-runner@v2 - with: - mode: start - github-token: ${{ secrets.PAT }} - ec2-image-id: ami-04cd9fec4a7a39019 - ec2-instance-type: g4dn.xlarge - subnet-id: subnet-72d3e53e - security-group-id: sg-06b0c93122c08aeab - test: - name: Do the job on the runner - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + name: Test on CUDA Build + runs-on: nvidia container: image: ghcr.io/deepmodeling/abacus-cuda options: --gpus all steps: - name: Checkout uses: actions/checkout@v4 - - name: Build cuSolver + with: + submodules: recursive + - name: Build run: | nvidia-smi - cmake -B build -DUSE_CUSOLVER_LCAO=ON + cmake -B build -DUSE_CUDA=ON -DBUILD_TESTING=ON cmake --build build -j4 cmake --install build - cmake -B build -DBUILD_TESTING=ON - cmake --build build -j4 --target hsolver_diago - - name: Test e2e - run: | - export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64 - cd tests/integrate - echo "ks_solver cusolver" >> ./270_NO_MD_2O/INPUT - ./Autotest.sh -r 270_NO_MD_2O - - name: Test UT - run: | - cd source/src_pdiag/test/ - cp ../../../build/source/src_pdiag/test/hsolver_diago . - ./hsolver_diago - bash diago_parallel_test.sh - - name: Test performance - run: | - cd examples/performance - ls -d P1*lcao* > allcase - sed -i '/ks_solver/d' P1*lcao*/INPUT - sed -i '$a ks_solver cusolver' P1*lcao*/INPUT - bash run.sh - cat sumall.dat - - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - test # required to wait when the main job is done - runs-on: ubuntu-latest - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - name: Stop EC2 runner - uses: machulav/ec2-github-runner@v2 - with: - mode: stop - github-token: ${{ secrets.PAT }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} + # TODO: add tests From f709ba050552ebf03a3bcddf4361da5eec581630 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:27:22 +0800 Subject: [PATCH 03/18] add some docs (#4039) --- docs/advanced/scf/converge.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/advanced/scf/converge.md b/docs/advanced/scf/converge.md index f74da82522..4f77f2f3b7 100644 --- a/docs/advanced/scf/converge.md +++ b/docs/advanced/scf/converge.md @@ -10,6 +10,8 @@ For each of the mixing types, we also provide variables for controlling relevant `mixing_ndim` is the mixing dimensions in DIIS (broyden or pulay) mixing. Gerenally, a larger `mixing_ndim` leads to a better convergence. the default choice `mixing_ndim=8` should work fine in most cases. For `mixing_type`, the default choice is `broyden`, which is slightly better than `Pulay` typically. Besides that, a large `mixing_beta` means a larger change in electron density for each SCF step. For well-behaved systems, a larger `mixing_beta` leads to faster convergence. However, for some difficult cases, a smaller `mixing_beta` is preferred to avoid numerical instabilities. +For most isolated systems, Kerker preconditioning is unnecessary. You can turn off it by setting `mixing_gg0 0.0` to get a faster convergence. + For non-spin-polarized calculations, the default choices usually achieve convergence. If convergence issue arises in metallic systems, you can try different value of Kerker preconditioning [mixing_gg0](../input_files/input-main.md#mixing_gg0) and [mixing_gg0_min](../input_files/input-main.md#mixing_gg0_min), and try to reduce `mixing_beta`, which is 0.8 defaultly for `nspin=1`. For magnetic calculations, `mixing_beta_mag` and `mixing_gg0_mag` are activated. Considering collinear calculations, you can rely on the default value for most cases. If convergence issue arises, you can try to reduce `mixing_beta` and `mixing_beta_mag` together. For non-collinear calculations, tradtional broyden usually works, especially for a given magnetic configuration. If one is not interested in the energies of a given magnetic configuration but wants to determine the ground state by relaxing the magnetic moments’ directions, the standard Broyden mixing algorithm sometimes fails to find the correct magnetic configuration. If so, we can set [mixing_angle=1.0](../input_files/input-main.md#mixing_angle), which is a promising mixing method proposed by J. Phys. Soc. Jpn. 82 (2013) 114706. From 25244770a1a8ea0cd999e9b99b084f8262cefa02 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Tue, 23 Apr 2024 14:34:02 +0800 Subject: [PATCH 04/18] fix final mag in STRU_ION_D (#4044) --- source/module_cell/read_atoms.cpp | 30 +++++++++++++++++----------- source/module_cell/unitcell.h | 2 +- source/module_io/mulliken_charge.cpp | 16 +++++++++++++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/source/module_cell/read_atoms.cpp b/source/module_cell/read_atoms.cpp index c5836b8aea..acf9a42600 100644 --- a/source/module_cell/read_atoms.cpp +++ b/source/module_cell/read_atoms.cpp @@ -1003,6 +1003,7 @@ void UnitCell::print_stru_file(const std::string &fn, const int &type, const int if(type==1) { + int nat_tmp = 0; ofs << "Cartesian" << std::endl; for(int it=0; it b); - double *atom_mag; + std::vector> atom_mulliken; //[nat][nspin] int n_mag_at; std::string& Coordinate = lat.Coordinate; diff --git a/source/module_io/mulliken_charge.cpp b/source/module_io/mulliken_charge.cpp index 72b0046cc3..60983030cb 100644 --- a/source/module_io/mulliken_charge.cpp +++ b/source/module_io/mulliken_charge.cpp @@ -312,6 +312,16 @@ void ModuleIO::out_mulliken(const int& step, LCAO_Matrix* LM, const elecstate::E os << "Decomposed Mulliken populations" << std::endl; GlobalV::ofs_running << std::endl < Date: Wed, 24 Apr 2024 12:32:28 +0800 Subject: [PATCH 05/18] Test: case test for noncolin force (#4048) --- tests/integrate/204_NO_KP_NC/INPUT | 2 + tests/integrate/204_NO_KP_NC/STRU | 2 +- tests/integrate/204_NO_KP_NC/result.ref | 8 +- .../204_NO_KP_NC_deltaspin/mulliken.txt.ref | 168 +++++++++--------- .../204_NO_KP_NC_deltaspin/result.ref | 7 +- 5 files changed, 96 insertions(+), 91 deletions(-) diff --git a/tests/integrate/204_NO_KP_NC/INPUT b/tests/integrate/204_NO_KP_NC/INPUT index 8886378fdf..4bb8c943c4 100644 --- a/tests/integrate/204_NO_KP_NC/INPUT +++ b/tests/integrate/204_NO_KP_NC/INPUT @@ -23,3 +23,5 @@ smearing_sigma 0.002 #Parameters (5.Mixing) + +cal_force 1 diff --git a/tests/integrate/204_NO_KP_NC/STRU b/tests/integrate/204_NO_KP_NC/STRU index 73cf74c4f9..cd92e43261 100644 --- a/tests/integrate/204_NO_KP_NC/STRU +++ b/tests/integrate/204_NO_KP_NC/STRU @@ -18,5 +18,5 @@ Direct Si // Element type 0.0 // magnetism 2 -0.00 0.00 0.00 1 1 1 mag 1.0 angle1 90 angle2 0 +0.10 0.00 0.00 1 1 1 mag 1.0 angle1 90 angle2 0 0.25 0.25 0.25 1 1 1 mag 1.0 angle1 90 angle2 180 diff --git a/tests/integrate/204_NO_KP_NC/result.ref b/tests/integrate/204_NO_KP_NC/result.ref index e0ce28aca8..d1c497de0c 100644 --- a/tests/integrate/204_NO_KP_NC/result.ref +++ b/tests/integrate/204_NO_KP_NC/result.ref @@ -1,3 +1,5 @@ -etotref -211.1858438233880 -etotperatomref -105.5929219117 -totaltimeref 1.3488 +etotref -210.3233812890469 +etotperatomref -105.1616906445 +totalforceref 17.244244 +totaltimeref 28.3911 +23.58 diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref index bffad6b08a..168c5723a3 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/mulliken.txt.ref @@ -3,92 +3,92 @@ CALCULATE THE MULLIkEN ANALYSIS FOR EACH ATOM Total charge: 32 Decomposed Mulliken populations 0 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.317 0.06196 -0.2625 -0.07949 - sum over m 1.317 0.06196 -0.2625 -0.07949 -s 1 1.726 -0.01809 0.09886 -0.01413 - sum over m 1.726 -0.01809 0.09886 -0.01413 -s 2 0.03246 -0.04153 0.2209 -0.02228 - sum over m 0.03246 -0.04153 0.2209 -0.02228 -s 3 -0.02921 0.005609 -0.025 -0.005114 - sum over m -0.02921 0.005609 -0.025 -0.005114 - sum over m+zeta 3.046 0.007945 0.0323 -0.121 -pz 0 2.034 -0.001186 0.005932 -3.981e-06 -px 0 2.033 -0.001283 0.006419 -3.989e-06 -py 0 2.033 -0.001188 0.005944 -3.979e-06 - sum over m 6.1 -0.003658 0.0183 -1.195e-05 -pz 1 -0.02621 0.0005578 -0.002789 0 -px 1 -0.02639 0.0006107 -0.003054 0 -py 1 -0.02603 0.0005536 -0.002768 0 - sum over m -0.07863 0.001722 -0.008611 0 - sum over m+zeta 6.021 -0.001936 0.009684 -1.277e-05 -dz^2 0 1.964 0.0008269 -0.004128 -1.088e-05 -dxz 0 1.044 0.156 -0.7849 -0.0003055 -dyz 0 0.9592 0.1564 -0.7869 -0.0003096 -dx^2-y^2 0 1.967 0.000752 -0.003754 -1.059e-05 -dxy 0 1.055 0.1558 -0.7835 -0.0003047 - sum over m 6.988 0.4698 -2.363 -0.0009413 -dz^2 1 0.03863 -0.0008716 0.004365 -1.357e-05 -dxz 1 -0.03708 -0.004148 0.02101 1.956e-05 -dyz 1 -0.03373 -0.004494 0.02274 1.968e-05 -dx^2-y^2 1 0.03943 -0.0009117 0.004566 -1.471e-05 -dxy 1 -0.03733 -0.004056 0.02055 1.945e-05 - sum over m -0.03008 -0.01448 0.07324 3.041e-05 - sum over m+zeta 6.958 0.4553 -2.29 -0.0009109 -fz^3 0 -0.007044 0.0007552 -0.003776 -1.406e-06 -fxz^2 0 -0.002046 0.0002628 -0.001314 0 -fyz^2 0 -0.00273 0.00029 -0.00145 0 -fzx^2-zy^2 0 5.811e-05 0 3.451e-06 0 -fxyz 0 1.14e-05 1.249e-06 -6.306e-06 0 -fx^3-3*xy^2 0 -0.003379 0.0004381 -0.00219 0 -f3yx^2-y^3 0 -0.00407 0.0004626 -0.002313 0 - sum over m -0.0192 0.002209 -0.01105 -4.307e-06 - sum over m+zeta -0.0192 0.002209 -0.01105 -4.307e-06 -Total Charge on atom: Fe 16.01 -Total Magnetism on atom: Fe (0.4635, -2.259, -0.1219) +s 0 1.317 0.05552 0.2843 0.02903 + sum over m 1.317 0.05552 0.2843 0.02903 +s 1 1.726 -0.01923 -0.09498 0.005159 + sum over m 1.726 -0.01923 -0.09498 0.005159 +s 2 0.03246 -0.04333 -0.2148 0.008137 + sum over m 0.03246 -0.04333 -0.2148 0.008137 +s 3 -0.02921 0.005194 0.02641 0.001867 + sum over m -0.02921 0.005194 0.02641 0.001867 + sum over m+zeta 3.046 -0.001842 0.0009368 0.04419 +pz 0 2.034 -0.001185 -0.005932 1.545e-06 +px 0 2.033 -0.001283 -0.006419 1.538e-06 +py 0 2.033 -0.001188 -0.005944 1.543e-06 + sum over m 6.1 -0.003656 -0.01829 4.626e-06 +pz 1 -0.02622 0.0005602 0.002791 0 +px 1 -0.02639 0.0006145 0.003054 0 +py 1 -0.02603 0.0005563 0.00277 0 + sum over m -0.07864 0.001731 0.008615 0 + sum over m+zeta 6.021 -0.001925 -0.00968 5.611e-06 +dz^2 0 1.964 0.0008273 0.004131 4.077e-06 +dxz 0 1.044 0.1755 0.7507 0.002258 +dyz 0 0.9544 0.1768 0.7532 0.002329 +dx^2-y^2 0 1.967 0.0007523 0.003756 3.978e-06 +dxy 0 1.055 0.1751 0.7495 0.002251 + sum over m 6.984 0.529 2.261 0.006846 +dz^2 1 0.03863 -0.0008699 -0.004363 5.197e-06 +dxz 1 -0.03759 -0.005346 -0.01936 -0.0001322 +dyz 1 -0.03407 -0.005734 -0.02118 -0.0001342 +dx^2-y^2 1 0.03943 -0.0009093 -0.004564 5.691e-06 +dxy 1 -0.03787 -0.005246 -0.0189 -0.0001314 + sum over m -0.03146 -0.01811 -0.06836 -0.000387 + sum over m+zeta 6.952 0.5109 2.193 0.006459 +fz^3 0 -0.007049 0.0007578 0.003775 0 +fxz^2 0 -0.002045 0.0002638 0.001312 0 +fyz^2 0 -0.002729 0.0002912 0.001448 0 +fzx^2-zy^2 0 6.273e-05 0 -6.642e-06 0 +fxyz 0 1.153e-05 1.446e-06 5.675e-06 0 +fx^3-3*xy^2 0 -0.00338 0.00044 0.002189 0 +f3yx^2-y^3 0 -0.00407 0.0004646 0.002311 0 + sum over m -0.0192 0.002219 0.01103 2.581e-06 + sum over m+zeta -0.0192 0.002219 0.01103 2.581e-06 +Total Charge on atom: Fe 16 +Total Magnetism on atom: Fe (0.5093, 2.195, 0.05066) 1 Zeta of Fe Spin 1 Spin 2 Spin 3 Spin 4 -s 0 1.275 0.04699 -0.2823 0.07949 - sum over m 1.275 0.04699 -0.2823 0.07949 -s 1 1.755 -0.01866 0.08491 0.01412 - sum over m 1.755 -0.01866 0.08491 0.01412 -s 2 -0.02899 -0.04221 0.1978 0.02226 - sum over m -0.02899 -0.04221 0.1978 0.02226 -s 3 -0.04712 0.00595 -0.03281 0.005133 - sum over m -0.04712 0.00595 -0.03281 0.005133 - sum over m+zeta 2.954 -0.007928 -0.03239 0.121 -pz 0 2.032 -0.001371 0.00685 3.967e-06 -px 0 2.025 -0.0009218 0.004606 3.958e-06 -py 0 2.032 -0.001333 0.006664 3.965e-06 - sum over m 6.089 -0.003626 0.01812 1.189e-05 -pz 1 -0.02529 0.0005803 -0.002904 0 -px 1 -0.01606 0.0001295 -0.0006492 0 -py 1 -0.02466 0.0005625 -0.002815 0 - sum over m -0.06602 0.001272 -0.006367 0 - sum over m+zeta 6.023 -0.002353 0.01175 1.25e-05 -dz^2 0 1.957 0.001154 -0.005778 1.149e-05 -dxz 0 1.091 0.1517 -0.7637 -8.462e-05 -dyz 0 0.9556 0.1553 -0.7815 -8.443e-05 -dx^2-y^2 0 1.947 0.001648 -0.008249 1.233e-05 -dxy 0 1.106 0.1508 -0.7591 -8.432e-05 - sum over m 7.056 0.4606 -2.318 -0.0002295 -dz^2 1 0.03925 -0.001067 0.005328 1.289e-05 -dxz 1 -0.03558 -0.002824 0.01439 2.5e-06 -dyz 1 -0.03117 -0.003962 0.0201 2.798e-06 -dx^2-y^2 1 0.04266 -0.001401 0.006997 1.29e-05 -dxy 1 -0.03637 -0.002747 0.01401 2.475e-06 - sum over m -0.02122 -0.012 0.06082 3.356e-05 - sum over m+zeta 7.035 0.4486 -2.257 -0.000196 -fz^3 0 -0.006615 0.0007206 -0.003605 1.352e-06 -fxz^2 0 -0.001955 0.0002554 -0.001278 0 -fyz^2 0 -0.002684 0.0002735 -0.001368 0 -fzx^2-zy^2 0 9.383e-05 1.68e-05 -8.473e-05 0 -fxyz 0 2.053e-05 3.66e-06 -1.839e-05 0 -fx^3-3*xy^2 0 -0.003204 0.0004266 -0.002134 0 -f3yx^2-y^3 0 -0.003695 0.0004558 -0.002281 0 - sum over m -0.01804 0.002152 -0.01077 4.022e-06 - sum over m+zeta -0.01804 0.002152 -0.01077 4.022e-06 -Total Charge on atom: Fe 15.99 -Total Magnetism on atom: Fe (0.4405, -2.289, 0.1208) +s 0 1.275 0.05341 0.2605 -0.02903 + sum over m 1.275 0.05341 0.2605 -0.02903 +s 1 1.755 -0.01752 -0.08879 -0.005156 + sum over m 1.755 -0.01752 -0.08879 -0.005156 +s 2 -0.02898 -0.0404 -0.2039 -0.00813 + sum over m -0.02898 -0.0404 -0.2039 -0.00813 +s 3 -0.04711 0.006367 0.03139 -0.001874 + sum over m -0.04711 0.006367 0.03139 -0.001874 + sum over m+zeta 2.954 0.001862 -0.0008532 -0.04419 +pz 0 2.032 -0.001369 -0.006852 -1.367e-06 +px 0 2.025 -0.0009208 -0.004608 -1.387e-06 +py 0 2.032 -0.001332 -0.006666 -1.366e-06 + sum over m 6.089 -0.003622 -0.01813 -4.119e-06 +pz 1 -0.02528 0.0005889 0.002889 0 +px 1 -0.01606 0.0001369 0.0006408 0 +py 1 -0.02466 0.000571 0.002802 0 + sum over m -0.066 0.001297 0.006331 2.367e-06 + sum over m+zeta 6.023 -0.002325 -0.01179 -1.753e-06 +dz^2 0 1.957 0.001158 0.005774 -3.913e-06 +dxz 0 1.097 0.1724 0.7275 0.002311 +dyz 0 0.9509 0.1759 0.7475 0.002269 +dx^2-y^2 0 1.947 0.001654 0.008245 -4.075e-06 +dxy 0 1.113 0.1714 0.7227 0.002304 + sum over m 7.065 0.5225 2.212 0.006876 +dz^2 1 0.03925 -0.001062 -0.005333 -4.383e-06 +dxz 1 -0.0366 -0.003947 -0.01263 -0.0001213 +dyz 1 -0.03157 -0.005197 -0.01856 -0.0001267 +dx^2-y^2 1 0.04266 -0.001394 -0.007002 -4.206e-06 +dxy 1 -0.03743 -0.003854 -0.01222 -0.0001203 + sum over m -0.02369 -0.01545 -0.05575 -0.0003768 + sum over m+zeta 7.041 0.5071 2.156 0.006499 +fz^3 0 -0.006614 0.0007261 0.003596 0 +fxz^2 0 -0.001954 0.0002565 0.001276 0 +fyz^2 0 -0.002684 0.0002742 0.001366 0 +fzx^2-zy^2 0 9.09e-05 1.99e-05 8.018e-05 0 +fxyz 0 2.062e-05 4.102e-06 1.816e-05 0 +fx^3-3*xy^2 0 -0.003203 0.0004291 0.00213 0 +f3yx^2-y^3 0 -0.003698 0.0004635 0.002271 0 + sum over m -0.01804 0.002174 0.01074 0 + sum over m+zeta -0.01804 0.002174 0.01074 0 +Total Charge on atom: Fe 16 +Total Magnetism on atom: Fe (0.5088, 2.154, -0.03769) diff --git a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref index 8a17a1fada..5db4c4ed06 100644 --- a/tests/integrate/204_NO_KP_NC_deltaspin/result.ref +++ b/tests/integrate/204_NO_KP_NC_deltaspin/result.ref @@ -1,4 +1,5 @@ -etotref -6844.326716364628 -etotperatomref -3422.1633581823 +etotref -6844.685232778927 +etotperatomref -3422.3426163895 Compare_mulliken_pass 0 -totaltimeref 36.59 +totaltimeref 28.5759 +14.63 From 92ef679df24c406d83f5130ea655560e058fb6b6 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Fri, 26 Apr 2024 12:24:41 +0800 Subject: [PATCH 06/18] Fix: add a check_rho() to avoid negative charge density (#4053) * add a check_rho() to avoid negative rho_up/rho_dn * add mocks in UTs * change warningquit to warning about rho * Update source/module_elecstate/module_charge/charge.cpp Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> * Update source/module_elecstate/module_charge/charge.cpp Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> --------- Co-authored-by: kirk0830 <67682086+kirk0830@users.noreply.github.com> --- source/module_elecstate/elecstate.cpp | 1 + .../module_elecstate/module_charge/charge.cpp | 41 +++++++++++++++++-- .../module_elecstate/module_charge/charge.h | 6 ++- source/module_elecstate/test/charge_test.cpp | 6 +-- .../test/elecstate_base_test.cpp | 3 ++ .../test/elecstate_pw_test.cpp | 3 ++ 6 files changed, 51 insertions(+), 9 deletions(-) diff --git a/source/module_elecstate/elecstate.cpp b/source/module_elecstate/elecstate.cpp index 393c2d07d5..6a8af42fb7 100644 --- a/source/module_elecstate/elecstate.cpp +++ b/source/module_elecstate/elecstate.cpp @@ -218,6 +218,7 @@ void ElecState::init_scf(const int istep, const ModuleBase::ComplexMatrix& struc if (istep == 0) { this->charge->init_rho(this->eferm, strucfac, this->bigpw->nbz, this->bigpw->bz); + this->charge->check_rho(); // check the rho } // renormalize the charge density diff --git a/source/module_elecstate/module_charge/charge.cpp b/source/module_elecstate/module_charge/charge.cpp index b1c7dee91d..e659c46213 100644 --- a/source/module_elecstate/module_charge/charge.cpp +++ b/source/module_elecstate/module_charge/charge.cpp @@ -700,7 +700,7 @@ void Charge::save_rho_before_sum_band(void) return; } -double Charge::check_ne(const double* rho_in) const +double Charge::cal_rho2ne(const double* rho_in) const { double ne = 0.0; for (int ir = 0; ir < this->rhopw->nrxx; ir++) @@ -711,12 +711,45 @@ double Charge::check_ne(const double* rho_in) const Parallel_Reduce::reduce_pool(ne); #endif ne = ne * elecstate::get_ucell_omega() / (double)this->rhopw->nxyz; - std::cout << std::setprecision(10); - std::cout << " check the electrons number from rho, ne =" << ne << std::endl; - std::cout << std::setprecision(6); + return ne; } +void Charge::check_rho() +{ + if (this->nspin==1 || this->nspin==4) + { + double ne = 0.0; + ne = this->cal_rho2ne(rho[0]); + if (std::abs(ne - GlobalV::nelec) > 1.0e-6) + { + ModuleBase::WARNING("Charge", "Charge is not equal to the number of electrons!"); + } + } + else if (this->nspin == 2) + { + // for spin up + double ne_up = 0.0; + ne_up = this->cal_rho2ne(rho[0]); + if (ne_up < 0.0) + { + ModuleBase::WARNING_QUIT("Charge", "Number of spin-down electrons set in starting magnetization exceeds all available."); + } + // for spin down + double ne_dn = 0.0; + ne_dn = this->cal_rho2ne(rho[1]); + if (ne_dn < 0.0) + { + ModuleBase::WARNING_QUIT("Charge", "Number of spin-up electrons set in starting magnetization exceeds all available."); + } + // for total charge + if (std::abs(ne_up + ne_dn - GlobalV::nelec) > 1.0e-6) + { + ModuleBase::WARNING("Charge", "Charge is not equal to the number of electrons!"); + } + } +} + // LiuXh add 20180619 void Charge::init_final_scf() { diff --git a/source/module_elecstate/module_charge/charge.h b/source/module_elecstate/module_charge/charge.h index 58bc2ef615..71fdc25120 100644 --- a/source/module_elecstate/module_charge/charge.h +++ b/source/module_elecstate/module_charge/charge.h @@ -98,9 +98,11 @@ class Charge double *rhocg ) const; - double check_ne(const double *rho_in) const; + double cal_rho2ne(const double *rho_in) const; - void init_final_scf(); //LiuXh add 20180619 + void check_rho(); // to check whether the charge density is normal + + void init_final_scf(); //LiuXh add 20180619 public: /** diff --git a/source/module_elecstate/test/charge_test.cpp b/source/module_elecstate/test/charge_test.cpp index 4c50265284..37cdd0ab05 100644 --- a/source/module_elecstate/test/charge_test.cpp +++ b/source/module_elecstate/test/charge_test.cpp @@ -64,7 +64,7 @@ void Set_GlobalV_Default() * - calculate \sum_{is}^nspin \sum_{ir}^nrxx rho[is][ir] * - RenormalizeRho: Charge::renormalize_rho() * - renormalize rho so as to ensure the sum of rho equals to total number of electrons - * - CheckNe: Charge::check_ne() + * - CheckNe: Charge::cal_rho2ne() * - check the total number of electrons summed from rho[is] * - SaveRhoBeforeSumBand: Charge::save_rho_before_sum_band() * - meaning as the function name @@ -185,7 +185,7 @@ TEST_F(ChargeTest, CheckNe) elecstate::tmp_ucell_omega = ucell->omega; charge->renormalize_rho(); EXPECT_NEAR(charge->sum_rho(), 8.0, 1e-10); - EXPECT_NEAR(charge->check_ne(charge->rho[0]), 8.0, 1e-10); + EXPECT_NEAR(charge->cal_rho2ne(charge->rho[0]), 8.0, 1e-10); } TEST_F(ChargeTest, SaveRhoBeforeSumBand) @@ -207,7 +207,7 @@ TEST_F(ChargeTest, SaveRhoBeforeSumBand) elecstate::tmp_ucell_omega = ucell->omega; charge->renormalize_rho(); charge->save_rho_before_sum_band(); - EXPECT_NEAR(charge->check_ne(charge->rho_save[0]), 8.0, 1e-10); + EXPECT_NEAR(charge->cal_rho2ne(charge->rho_save[0]), 8.0, 1e-10); } TEST_F(ChargeTest, InitFinalScf) diff --git a/source/module_elecstate/test/elecstate_base_test.cpp b/source/module_elecstate/test/elecstate_base_test.cpp index 2f5ee12e6d..33b831e6a9 100644 --- a/source/module_elecstate/test/elecstate_base_test.cpp +++ b/source/module_elecstate/test/elecstate_base_test.cpp @@ -74,6 +74,9 @@ void Charge::set_rhopw(ModulePW::PW_Basis*) void Charge::renormalize_rho() { } +void Charge::check_rho() +{ +} /************************************************ * unit test of elecstate.cpp diff --git a/source/module_elecstate/test/elecstate_pw_test.cpp b/source/module_elecstate/test/elecstate_pw_test.cpp index ea859072ad..b82dd3cef8 100644 --- a/source/module_elecstate/test/elecstate_pw_test.cpp +++ b/source/module_elecstate/test/elecstate_pw_test.cpp @@ -144,6 +144,9 @@ void Charge::set_rhopw(ModulePW::PW_Basis*) void Charge::renormalize_rho() { } +void Charge::check_rho() +{ +} void Set_GlobalV_Default() { From 7f84a0996a90e439c95a34e560e5fe33b452ac6c Mon Sep 17 00:00:00 2001 From: Peng Xingliang <91927439+pxlxingliang@users.noreply.github.com> Date: Fri, 26 Apr 2024 19:07:47 +0800 Subject: [PATCH 07/18] update version (#4061) --- source/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/version.h b/source/version.h index db044fd958..3be59f6341 100644 --- a/source/version.h +++ b/source/version.h @@ -1,3 +1,3 @@ #ifndef VERSION -#define VERSION "v3.6.1" +#define VERSION "v3.6.2" #endif From f372175d267b92e99e20c668e50111c5acae4860 Mon Sep 17 00:00:00 2001 From: GRYS <57103981+grysgreat@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:07:24 +0800 Subject: [PATCH 08/18] doc: add docs about using MPI in docker. (#4060) * add docs. * Update docs/community/faq.md Co-authored-by: Chun Cai * Update docs/community/faq.md Co-authored-by: Chun Cai --------- Co-authored-by: Chun Cai --- docs/community/faq.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/community/faq.md b/docs/community/faq.md index e2a1cdee02..724519a35e 100644 --- a/docs/community/faq.md +++ b/docs/community/faq.md @@ -80,6 +80,8 @@ If the program prompt something like "KILLED BY SIGNAL: 9 (Killed)", it may be c If the error message is "Segmentation fault", or there is no enough information on the error, please feel free to submit an issue. +**4. Error "Read -1" when using mpirun in docker environment** +This is a [known issue](https://github.com/open-mpi/ompi/issues/4948) of OpenMPI running in a docker container. In this case, please set environment variable `OMPI_MCA_btl_vader_single_copy_mechanism=none`. ## Miscellaneous **1. How to visualize charge density file?** From b74f2a0b67fb190bc8d35a5d88f3ece6cdec94f0 Mon Sep 17 00:00:00 2001 From: Wenfei Li <38569667+wenfei-li@users.noreply.github.com> Date: Sun, 28 Apr 2024 10:39:53 +0800 Subject: [PATCH 09/18] Feature : descriptors for equivariant DeePKS (#3894) * Feature : add interfaces for calculating equivariant pdm * add priting of equiv descriptors * modify calculation of new pdm * modify calculation of new pdm (multi-k) --------- Co-authored-by: wenfei-li --- .../module_esolver/esolver_ks_lcao_elec.cpp | 2 +- .../esolver_ks_lcao_tmpfunc.cpp | 8 +- .../hamilt_lcaodft/FORCE_gamma.cpp | 2 +- .../hamilt_lcaodft/FORCE_k.cpp | 4 +- .../operator_lcao/deepks_lcao.cpp | 14 +- .../module_deepks/LCAO_deepks.cpp | 47 ++-- .../module_deepks/LCAO_deepks.h | 23 +- .../module_deepks/LCAO_deepks_interface.cpp | 6 +- .../module_deepks/LCAO_deepks_io.cpp | 36 ++- .../module_deepks/LCAO_deepks_pdm.cpp | 211 +++++++++++++----- .../module_deepks/LCAO_deepks_torch.cpp | 74 ++++-- .../module_deepks/test/LCAO_deepks_test.cpp | 6 +- 12 files changed, 313 insertions(+), 120 deletions(-) diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index 5eeaaa2e63..35ee6df306 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -689,7 +689,7 @@ void ESolver_KS_LCAO::nscf(void) const elecstate::DensityMatrix* dm = dynamic_cast*>(this->pelec)->get_DM(); this->dpks_cal_projected_DM(dm); - GlobalC::ld.cal_descriptor(); // final descriptor + GlobalC::ld.cal_descriptor(GlobalC::ucell.nat); // final descriptor GlobalC::ld.cal_gedm(GlobalC::ucell.nat); } #endif diff --git a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp index a8ffb911f7..95fd4312a0 100644 --- a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp +++ b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp @@ -93,9 +93,7 @@ namespace ModuleESolver GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, - GlobalC::GridD, - this->kv.nks, - this->kv.kvec_d); + GlobalC::GridD); } @@ -106,9 +104,7 @@ namespace ModuleESolver GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, - GlobalC::GridD, - this->kv.nks, - this->kv.kvec_d); + GlobalC::GridD); } #endif } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp index a6670fdc9a..0751a9e46f 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp @@ -73,7 +73,7 @@ void Force_LCAO_gamma::ftable_gamma(const bool isforce, { const std::vector>& dm_gamma = DM->get_DMK_vector(); GlobalC::ld.cal_projected_DM(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD); - GlobalC::ld.cal_descriptor(); + GlobalC::ld.cal_descriptor(GlobalC::ucell.nat); GlobalC::ld.cal_gedm(GlobalC::ucell.nat); GlobalC::ld.cal_f_delta_gamma( dm_gamma, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp index 2dea6d49f3..736813ccbf 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp @@ -124,8 +124,8 @@ void Force_LCAO_k::ftable_k(const bool isforce, if (GlobalV::deepks_scf) { const std::vector>>& dm_k = DM->get_DMK_vector(); - GlobalC::ld.cal_projected_DM_k(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD, kv.nks, kv.kvec_d); - GlobalC::ld.cal_descriptor(); + GlobalC::ld.cal_projected_DM_k(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD); + GlobalC::ld.cal_descriptor(GlobalC::ucell.nat); GlobalC::ld.cal_gedm(GlobalC::ucell.nat); GlobalC::ld.cal_f_delta_k(dm_k, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp index c5e3174539..b61a4b248c 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp @@ -156,7 +156,7 @@ void DeePKS>::contributeHR() *this->ucell, GlobalC::ORB, GlobalC::GridD); - GlobalC::ld.cal_descriptor(); + GlobalC::ld.cal_descriptor(this->ucell->nat); GlobalC::ld.cal_gedm(this->ucell->nat); //GlobalC::ld.add_v_delta(*this->ucell, // GlobalC::ORB, @@ -189,10 +189,8 @@ void DeePKS, double>>::contributeHR() GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, GlobalC::ORB, - GlobalC::GridD, - this->nks, - this->kvec_d); - GlobalC::ld.cal_descriptor(); + GlobalC::GridD); + GlobalC::ld.cal_descriptor(this->ucell->nat); // calculate dE/dD GlobalC::ld.cal_gedm(this->ucell->nat); @@ -230,10 +228,8 @@ void DeePKS, std::complex>>::contribut GlobalC::ld.cal_projected_DM_k(this->DM, *this->ucell, GlobalC::ORB, - GlobalC::GridD, - this->nks, - this->kvec_d); - GlobalC::ld.cal_descriptor(); + GlobalC::GridD); + GlobalC::ld.cal_descriptor(this->ucell->nat); // calculate dE/dD GlobalC::ld.cal_gedm(this->ucell->nat); diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp index 18192ab8a8..a29c0754fa 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.cpp @@ -87,17 +87,36 @@ void LCAO_Deepks::init( assert(nm >= 0); assert(tot_inl_per_atom >= 0); - const int tot_inl = tot_inl_per_atom * nat; + int tot_inl = tot_inl_per_atom * nat; + + if(if_equiv) tot_inl = nat; this->lmaxd = lm; this->nmaxd = nm; - this->inlmax = tot_inl; + GlobalV::ofs_running << " lmax of descriptor = " << this->lmaxd << std::endl; GlobalV::ofs_running << " nmax of descriptor= " << nmaxd << std::endl; - GlobalV::ofs_running << " total basis (all atoms) for descriptor= " << std::endl; - - //init pdm** - const int pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + + int pdm_size = 0; + this->inlmax = tot_inl; + if(!if_equiv) + { + GlobalV::ofs_running << " total basis (all atoms) for descriptor= " << std::endl; + + //init pdm** + pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + } + else + { + for(int il = 0; il < this->lmaxd + 1; il++) + { + pdm_size += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + pdm_size = pdm_size * pdm_size; + this->des_per_atom=pdm_size; + GlobalV::ofs_running << " Equivariant version, size of pdm matrices : " << pdm_size << std::endl; + } + this->pdm = new double* [this->inlmax]; for (int inl = 0;inl < this->inlmax;inl++) { @@ -106,15 +125,17 @@ void LCAO_Deepks::init( } // cal n(descriptor) per atom , related to Lmax, nchi(L) and m. (not total_nchi!) - this->des_per_atom=0; // mohan add 2021-04-21 - for (int l = 0; l <= this->lmaxd; l++) + if(!if_equiv) { - this->des_per_atom += orb.Alpha[0].getNchi(l) * (2 * l + 1); - } - - this->n_descriptor = nat * this->des_per_atom; + this->des_per_atom=0; // mohan add 2021-04-21 + for (int l = 0; l <= this->lmaxd; l++) + { + this->des_per_atom += orb.Alpha[0].getNchi(l) * (2 * l + 1); + } + this->n_descriptor = nat * this->des_per_atom; - this->init_index(ntype, nat, na, tot_inl, orb); + this->init_index(ntype, nat, na, tot_inl, orb); + } this->allocate_nlm(nat); this->pv = &pv_in; diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h index dbf6ee26ad..1ec77ed412 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks.h @@ -93,6 +93,8 @@ class LCAO_Deepks int nks_V_delta = 0; bool init_pdm = false; //for DeePKS NSCF calculation + + bool if_equiv = false; //equivariant version // deep neural network module that provides corrected Hamiltonian term and // related derivatives. @@ -106,7 +108,7 @@ class LCAO_Deepks std::vector>>>> nlm_save_k; // projected density matrix - double** pdm; //[tot_Inl][2l+1][2l+1] caoyu modified 2021-05-07 + double** pdm; //[tot_Inl][2l+1][2l+1] caoyu modified 2021-05-07; if equivariant version: [nat][nlm*nlm] std::vector pdm_tensor; // descriptors @@ -287,11 +289,20 @@ class LCAO_Deepks const elecstate::DensityMatrix, double>* dm, const UnitCell &ucell, const LCAO_Orbitals &orb, - Grid_Driver& GridD, - const int nks, - const std::vector> &kvec_d); + Grid_Driver& GridD); void check_projected_dm(void); + void cal_projected_DM_equiv( + const elecstate::DensityMatrix* dm, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver& GridD); + void cal_projected_DM_k_equiv( + const elecstate::DensityMatrix, double>* dm, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + Grid_Driver& GridD); + //calculate the gradient of pdm with regard to atomic positions //d/dX D_{Inl,mm'} void cal_gdmx(//const ModuleBase::matrix& dm, @@ -439,10 +450,12 @@ class LCAO_Deepks ///Calculates descriptors ///which are eigenvalues of pdm in blocks of I_n_l - void cal_descriptor(void); + void cal_descriptor(const int nat); ///print descriptors based on LCAO basis void check_descriptor(const UnitCell &ucell); + void cal_descriptor_equiv(const int nat); + ///calculates gradient of descriptors w.r.t atomic positions ///---------------------------------------------------- ///m, n: 2*l+1 diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp index a6b14dcb58..6c0f72e242 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_interface.cpp @@ -88,7 +88,7 @@ void LCAO_Deepks_Interface::out_deepks_labels(double etot, // this part is for integrated test of deepks ld->cal_projected_DM(dm, ucell, orb, GridD); ld->check_projected_dm(); // print out the projected dm for NSCF calculaiton - ld->cal_descriptor(); // final descriptor + ld->cal_descriptor(nat); // final descriptor ld->check_descriptor(ucell); if (GlobalV::deepks_out_labels) @@ -188,9 +188,9 @@ void LCAO_Deepks_Interface::out_deepks_labels(double etot, { // this part is for integrated test of deepks // so it is printed no matter even if deepks_out_labels is not used - ld->cal_projected_DM_k(dm, ucell, orb, GridD, nks, kvec_d); + ld->cal_projected_DM_k(dm, ucell, orb, GridD); ld->check_projected_dm(); // print out the projected dm for NSCF calculaiton - ld->cal_descriptor(); // final descriptor + ld->cal_descriptor(nat); // final descriptor ld->check_descriptor(ucell); if (GlobalV::deepks_out_labels) diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp index 3d50ad646c..646e5c0a63 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_io.cpp @@ -65,19 +65,39 @@ void LCAO_Deepks::save_npy_d(const int nat) ModuleBase::TITLE("LCAO_Deepks", "save_npy_d"); if(GlobalV::MY_RANK!=0) return; //save descriptor in .npy format - vector npy_des; - for (int inl = 0;inl < inlmax;++inl) + if(!if_equiv) { - int nm = 2*inl_l[inl] + 1; - for(int im=0;im npy_des; + for (int inl = 0;inl < inlmax;++inl) { - npy_des.push_back(this->d_tensor[inl].index({im}).item().toDouble()); + int nm = 2*inl_l[inl] + 1; + for(int im=0;imd_tensor[inl].index({im}).item().toDouble()); + } + } + const long unsigned dshape[] = {static_cast(nat), static_cast(this->des_per_atom)}; + if (GlobalV::MY_RANK == 0) + { + npy::SaveArrayAsNumpy("dm_eig.npy", false, 2, dshape, npy_des); } } - const long unsigned dshape[] = {static_cast(nat), static_cast(this->des_per_atom)}; - if (GlobalV::MY_RANK == 0) + else { - npy::SaveArrayAsNumpy("dm_eig.npy", false, 2, dshape, npy_des); + // a rather unnecessary way of writing this, but I'll do it for now + std::vector npy_des; + for(int iat = 0; iat < nat; iat ++) + { + for(int i = 0; i < this->des_per_atom; i++) + { + npy_des.push_back(this->d_tensor[iat].index({i}).item().toDouble()); + } + } + const long unsigned dshape[] = {static_cast(nat), static_cast(this->des_per_atom)}; + if (GlobalV::MY_RANK == 0) + { + npy::SaveArrayAsNumpy("dm_eig.npy", false, 2, dshape, npy_des); + } } return; } diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp index bf21360ebe..88a56582e6 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_pdm.cpp @@ -34,7 +34,20 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrixlmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + int pdm_size; + if(!if_equiv) + { + pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + } + else + { + int nproj = 0; + for(int il = 0; il < this->lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + pdm_size = nproj * nproj; + } if (GlobalV::init_chg == "file" && !this->init_pdm) //for DeePKS NSCF calculation { std::ifstream ifs("pdm.dat"); @@ -80,23 +93,42 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix trace_alpha_row; std::vector trace_alpha_col; - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + if(!if_equiv) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + int ib=0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1inl_index[T0](I0, L0, N0); + const int nm = 2*L0+1; + + for (int m1=0; m1lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for(int iproj = 0; iproj < nproj; iproj ++) + { + for(int jproj = 0; jproj < nproj; jproj ++) + { + trace_alpha_row.push_back(iproj); + trace_alpha_col.push_back(jproj); } - ib+=nm; } } const int trace_alpha_size = trace_alpha_row.size(); @@ -199,25 +231,46 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrixinl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1inl_index[T0](I0, L0, N0); + const int nm = 2*L0+1; + + for (int m1=0; m1lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for(int iproj = 0; iproj < nproj; iproj ++) + { + for(int jproj = 0; jproj < nproj; jproj ++) + { + pdm[iat][iproj * nproj + jproj] += + ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc); + index ++; } - ib+=nm; } } }//ad1 @@ -234,11 +287,23 @@ void LCAO_Deepks::cal_projected_DM(const elecstate::DensityMatrix, double>* dm, const UnitCell &ucell, const LCAO_Orbitals &orb, - Grid_Driver& GridD, - const int nks, - const std::vector> &kvec_d) + Grid_Driver& GridD) { - const int pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + + int pdm_size; + if(!if_equiv) + { + pdm_size = (this->lmaxd * 2 + 1) * (this->lmaxd * 2 + 1); + } + else + { + int nproj = 0; + for(int il = 0; il < this->lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + pdm_size = nproj * nproj; + } if (GlobalV::init_chg == "file" && !this->init_pdm) //for DeePKS NSCF calculation { @@ -285,25 +350,44 @@ void LCAO_Deepks::cal_projected_DM_k(const elecstate::DensityMatrix trace_alpha_row; std::vector trace_alpha_col; - int ib=0; - for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) + if(!if_equiv) { - for (int N0 = 0;N0 < orb.Alpha[0].getNchi(L0);++N0) + int ib=0; + for (int L0 = 0; L0 <= orb.Alpha[0].getLmax();++L0) { - const int inl = this->inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1inl_index[T0](I0, L0, N0); + const int nm = 2*L0+1; + + for (int m1=0; m1lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for(int iproj = 0; iproj < nproj; iproj ++) + { + for(int jproj = 0; jproj < nproj; jproj ++) + { + trace_alpha_row.push_back(iproj); + trace_alpha_col.push_back(jproj); + } + } + } const int trace_alpha_size = trace_alpha_row.size(); for (int ad1=0; ad1inl_index[T0](I0, L0, N0); - const int nm = 2*L0+1; - - for (int m1=0; m1inl_index[T0](I0, L0, N0); + const int nm = 2*L0+1; + + for (int m1=0; m1lmaxd + 1; il++) + { + nproj += (2 * il + 1) * orb.Alpha[0].getNchi(il); + } + for(int iproj = 0; iproj < nproj; iproj ++) + { + for(int jproj = 0; jproj < nproj; jproj ++) + { + pdm[iat][iproj * nproj + jproj] += + ddot_(&row_size, g_1dmt.data()+index*row_size, &inc, s_1t.data()+index*row_size, &inc); + index ++; + } + } } }//ad1 }//I0 diff --git a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp index 57c7974e12..c7d81ac2d3 100644 --- a/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp +++ b/source/module_hamilt_lcao/module_deepks/LCAO_deepks_torch.cpp @@ -30,12 +30,39 @@ #include "module_base/libm/libm.h" #include "module_base/blas_connector.h" +void LCAO_Deepks::cal_descriptor_equiv(const int nat) +{ + ModuleBase::TITLE("LCAO_Deepks", "cal_descriptor_equiv"); + ModuleBase::timer::tick("LCAO_Deepks", "cal_descriptor_equiv"); + + // a rather unnecessary way of writing this, but I'll do it for now + if (!this->d_tensor.empty()) + { + this->d_tensor.erase(this->d_tensor.begin(), this->d_tensor.end()); + } + + for(int iat = 0; iat < nat; iat++) + { + auto tmp = torch::zeros(des_per_atom,torch::kFloat64); + std::memcpy(tmp.data_ptr(),pdm[iat],sizeof(double)*tmp.numel()); + this->d_tensor.push_back(tmp); + } + + ModuleBase::timer::tick("LCAO_Deepks", "cal_descriptor_equiv"); +} + //calculates descriptors from projected density matrices -void LCAO_Deepks::cal_descriptor(void) +void LCAO_Deepks::cal_descriptor(const int nat) { ModuleBase::TITLE("LCAO_Deepks", "cal_descriptor"); ModuleBase::timer::tick("LCAO_Deepks", "cal_descriptor"); + if(if_equiv) + { + this->cal_descriptor_equiv(nat); + return; + } + //init pdm_tensor and d_tensor torch::Tensor tmp; @@ -85,24 +112,41 @@ void LCAO_Deepks::check_descriptor(const UnitCell &ucell) if(GlobalV::MY_RANK!=0) return; std::ofstream ofs("descriptor.dat"); ofs<des_per_atom << std::endl; - int id = 0; - for(int inl=0;inldes_per_atom << std::endl; + int id = 0; + for(int inl=0;inldes_per_atom << std::endl; + for(int i = 0; i < this->des_per_atom; i ++) + { + ofs << this->pdm[iat][i] << " "; + if (i % 8 == 7) ofs << std::endl; + } ofs << std::endl << std::endl; } } diff --git a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp index adb73fc568..f42ce77fc5 100644 --- a/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp +++ b/source/module_hamilt_lcao/module_deepks/test/LCAO_deepks_test.cpp @@ -129,9 +129,7 @@ void test_deepks::check_pdm(void) this->ld.cal_projected_DM_k(dm_k_new, ucell, ORB, - Test_Deepks::GridD, - kv.nkstot, - kv.kvec_d); + Test_Deepks::GridD); } this->ld.check_projected_dm(); this->compare_with_ref("pdm.dat","pdm_ref.dat"); @@ -187,7 +185,7 @@ void test_deepks::check_gdmx(void) void test_deepks::check_descriptor(void) { - this->ld.cal_descriptor(); + this->ld.cal_descriptor(ucell.nat); this->ld.check_descriptor(ucell); this->compare_with_ref("descriptor.dat","descriptor_ref.dat"); } From c36e5170aa38537ea4b3d680f643502f0441df15 Mon Sep 17 00:00:00 2001 From: Wenfei Li <38569667+wenfei-li@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:14:01 +0800 Subject: [PATCH 10/18] Feature : reading and writing of h(r) and dm(r) in npz format (#4066) * Feature : reading and writing of h(r) and dm(r) in npz format * fix bug * udpate makefile * fix makefile and cmake --------- Co-authored-by: wenfei-li --- CMakeLists.txt | 22 + docs/advanced/input_files/input-main.md | 14 + source/Makefile.Objects | 1 + source/module_base/global_variable.cpp | 3 + source/module_base/global_variable.h | 3 + source/module_elecstate/elecstate_lcao.cpp | 7 + source/module_esolver/CMakeLists.txt | 1 + source/module_esolver/esolver_ks.cpp | 16 + source/module_esolver/esolver_ks_lcao.cpp | 32 ++ source/module_esolver/esolver_ks_lcao.h | 3 + .../module_esolver/esolver_ks_lcao_elec.cpp | 23 + source/module_esolver/io_npz.cpp | 499 ++++++++++++++++++ source/module_io/input.cpp | 32 +- source/module_io/input.h | 3 + source/module_io/input_conv.cpp | 3 + source/module_io/parameter_pool.cpp | 12 + source/module_io/test/input_conv_test.cpp | 3 + source/module_io/test/write_input_test.cpp | 3 + source/module_io/write_input.cpp | 3 + 19 files changed, 681 insertions(+), 2 deletions(-) create mode 100644 source/module_esolver/io_npz.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 44fcd8f094..6a820b8b63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ option(COMMIT_INFO "Print commit information in log" ON) option(ENABLE_FFT_TWO_CENTER "Enable FFT-based two-center integral method." ON) option(ENABLE_GOOGLEBENCH "Enable GOOGLE-benchmark usage." OFF) option(ENABLE_RAPIDJSON "Enable rapid-json usage." OFF) +option(ENABLE_CNPY "Enable cnpy usage." OFF) # enable json support if(ENABLE_RAPIDJSON) @@ -467,6 +468,27 @@ if(ENABLE_DEEPKS) add_compile_definitions(__DEEPKS) endif() +if (ENABLE_CNPY) + find_path(cnpy_SOURCE_DIR + cnpy.h + HINTS ${libnpy_INCLUDE_DIR} + ) + if(NOT cnpy_SOURCE_DIR) + include(FetchContent) + FetchContent_Declare( + cnpy + GIT_REPOSITORY https://github.com/rogersce/cnpy.git + GIT_PROGRESS TRUE + ) + FetchContent_MakeAvailable(cnpy) + else() + include_directories(${cnpy_INCLUDE_DIR}) + endif() + include_directories(${cnpy_SOURCE_DIR}) + target_link_libraries(${ABACUS_BIN_NAME} cnpy) + add_compile_definitions(__USECNPY) +endif() + function(git_submodule_update) if(GIT_SUBMOD_RESULT EQUAL "0") message(DEBUG "Submodule init'ed") diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 16bebcab9d..f1345f04c4 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -1607,6 +1607,20 @@ These variables are used to control the output of properties. - **Description**: Whether to print the upper triangular part of the exchange-correlation matrices in **Kohn-Sham orbital representation** (unit: Ry): $\braket{\psi_i|V_\text{xc}^\text{(semi-)local}+V_\text{exx}+V_\text{DFTU}|\psi_j}$ for each k point into files in the directory `OUT.${suffix}`, which is useful for the subsequent GW calculation. (Note that currently DeePKS term is not included. ) The files are named `k-$k-Vxc`, the meaning of `$k`corresponding to k point and spin is same as [hs_matrix.md](../elec_properties/hs_matrix.md#out_mat_hs). - **Default**: False +### out_hr_npz/out_dm_npz + +- **Type**: Boolean +- **Availability**: Numerical atomic orbital basis +- **Description**: Whether to print Hamiltonian matrices H(R)/density matrics DM(R) in npz format. This feature does not work for gamma-only calculations. Currently only intended for internal usage. +- **Default**: False + +### dm_to_rho + +- **Type**: Boolean +- **Availability**: Numerical atomic orbital basis +- **Description**: Reads density matrix DM(R) in npz format and creates electron density on grids. This feature does not work for gamma-only calculations. Only supports serial calculations. Currently only intended for internal usage. +- **Default**: False + ### out_app_flag - **Type**: Boolean diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 5416eb69e7..f61b7cced1 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -222,6 +222,7 @@ OBJS_ESOLVER_LCAO=esolver_ks_lcao.o\ esolver_ks_lcao_elec.o\ esolver_ks_lcao_tddft.o\ esolver_ks_lcao_tmpfunc.o\ + io_npz.o\ OBJS_GINT=gint.o\ gint_gamma.o\ diff --git a/source/module_base/global_variable.cpp b/source/module_base/global_variable.cpp index b9ceb358da..84ae03d359 100644 --- a/source/module_base/global_variable.cpp +++ b/source/module_base/global_variable.cpp @@ -283,6 +283,9 @@ bool out_bandgap = false; // QO added for bandgap printing int out_interval = 1; // convert from out_hsR_interval liuyu 2023-04-18 bool out_mat_xc = false; // output Vxc in KS-wfc representation for GW calculation +bool out_hr_npz = false; +bool out_dm_npz = false; +bool dm_to_rho = false; // reads dm in npz format, then prints density in cube format //========================================================== // Deltaspin related diff --git a/source/module_base/global_variable.h b/source/module_base/global_variable.h index a504307ce2..bf181c84d9 100644 --- a/source/module_base/global_variable.h +++ b/source/module_base/global_variable.h @@ -313,6 +313,9 @@ extern bool out_bandgap; extern int out_interval; extern bool out_mat_xc; // output Vxc in KS-wfc representation for GW calculation +extern bool out_hr_npz; //writes h(r) in npz format +extern bool out_dm_npz; //writes dm(r) in npz format +extern bool dm_to_rho; //reads in dm(r) and creates density // Deltaspin related extern bool sc_mag_switch; // 0: no deltaspin; 1: constrain atomic magnetic moments; diff --git a/source/module_elecstate/elecstate_lcao.cpp b/source/module_elecstate/elecstate_lcao.cpp index a70153fe68..4fdbe229fb 100644 --- a/source/module_elecstate/elecstate_lcao.cpp +++ b/source/module_elecstate/elecstate_lcao.cpp @@ -93,6 +93,12 @@ void ElecStateLCAO>::psiToRho(const psi::Psicalculate_weights(); + +// the calculations of dm, and dm -> rho are, technically, two separate functionalities, as we cannot +// rule out the possibility that we may have a dm from other sources, such as read from file. +// However, since we are not separating them now, I opt to add a flag to control how dm is obtained as of now +if(!GlobalV::dm_to_rho) +{ this->calEBand(); ModuleBase::GlobalFunc::NOTE("Calculate the density matrix."); @@ -130,6 +136,7 @@ void ElecStateLCAO>::psiToRho(const psi::Psiprint_psi(psi); } } +} // old 2D-to-Grid conversion has been replaced by new Gint Refactor 2023/09/25 //this->loc->cal_dk_k(*this->lowf->gridt, this->wg, (*this->klist)); for (int is = 0; is < GlobalV::NSPIN; is++) diff --git a/source/module_esolver/CMakeLists.txt b/source/module_esolver/CMakeLists.txt index 978160295e..44fe0eabe1 100644 --- a/source/module_esolver/CMakeLists.txt +++ b/source/module_esolver/CMakeLists.txt @@ -18,6 +18,7 @@ if(ENABLE_LCAO) esolver_ks_lcao_elec.cpp esolver_ks_lcao_tddft.cpp esolver_ks_lcao_tmpfunc.cpp + io_npz.cpp ) endif() diff --git a/source/module_esolver/esolver_ks.cpp b/source/module_esolver/esolver_ks.cpp index 59db2849fd..e69e61c296 100644 --- a/source/module_esolver/esolver_ks.cpp +++ b/source/module_esolver/esolver_ks.cpp @@ -390,6 +390,7 @@ void ESolver_KS::run(const int istep, UnitCell& ucell) ModuleBase::timer::tick(this->classname, "run"); this->before_scf(istep); //Something else to do before the iter loop + if(GlobalV::dm_to_rho) return; //nothing further is needed ModuleBase::GlobalFunc::DONE(GlobalV::ofs_running, "INIT SCF"); @@ -631,6 +632,21 @@ ModuleIO::Output_Rho ESolver_KS::create_Output_Rho( { const int precision = 3; std::string tag = "CHG"; + if(GlobalV::dm_to_rho) + { + return ModuleIO::Output_Rho(this->pw_big, + this->pw_rhod, + is, + GlobalV::NSPIN, + pelec->charge->rho[is], + iter, + this->pelec->eferm.get_efval(is), + &(GlobalC::ucell), + GlobalV::global_out_dir, + precision, + tag, + prefix); + } return ModuleIO::Output_Rho(this->pw_big, this->pw_rhod, is, diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 949e6c4ba7..80c2d94c79 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -1116,6 +1116,38 @@ void ESolver_KS_LCAO::after_scf(const int istep) } #endif + if(GlobalV::out_hr_npz) + { + this->p_hamilt->updateHk(0); // first k point, up spin + hamilt::HamiltLCAO, double>* p_ham_lcao + = dynamic_cast, double>*>(this->p_hamilt); + std::string zipname = "output_HR0.npz"; + this->output_mat_npz(zipname,*(p_ham_lcao->getHR())); + + if(GlobalV::NSPIN==2) + { + this->p_hamilt->updateHk(this->kv.nks/2); // the other half of k points, down spin + hamilt::HamiltLCAO, double>* p_ham_lcao + = dynamic_cast, double>*>(this->p_hamilt); + zipname = "output_HR1.npz"; + this->output_mat_npz(zipname,*(p_ham_lcao->getHR())); + } + } + + if(GlobalV::out_dm_npz) + { + const elecstate::DensityMatrix* dm + = dynamic_cast*>(this->pelec)->get_DM(); + std::string zipname = "output_DM0.npz"; + this->output_mat_npz(zipname,*(dm->get_DMR_pointer(1))); + + if(GlobalV::NSPIN==2) + { + zipname = "output_DM1.npz"; + this->output_mat_npz(zipname,*(dm->get_DMR_pointer(2))); + } + } + if (!md_skip_out(GlobalV::CALCULATION, istep, GlobalV::out_interval)) { this->create_Output_Mat_Sparse(istep).write(); diff --git a/source/module_esolver/esolver_ks_lcao.h b/source/module_esolver/esolver_ks_lcao.h index f579579cea..9886081e0f 100644 --- a/source/module_esolver/esolver_ks_lcao.h +++ b/source/module_esolver/esolver_ks_lcao.h @@ -115,6 +115,9 @@ namespace ModuleESolver /// @brief create ModuleIO::Output_Mat_Sparse object to output sparse density matrix of H, S, T, r ModuleIO::Output_Mat_Sparse create_Output_Mat_Sparse(int istep); + void read_mat_npz(std::string& zipname, hamilt::HContainer& hR); + void output_mat_npz(std::string& zipname, const hamilt::HContainer& hR); + /// @brief check if skip the corresponding output in md calculation bool md_skip_out(std::string calculation, int istep, int interval); diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index 35ee6df306..3646858a49 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -329,6 +329,29 @@ void ESolver_KS_LCAO::before_scf(int istep) ->get_DM() ->init_DMR(*(dynamic_cast*>(this->p_hamilt)->getHR())); + if(GlobalV::dm_to_rho) + { + std::string zipname = "output_DM0.npz"; + elecstate::DensityMatrix* dm + = dynamic_cast*>(this->pelec)->get_DM(); + this->read_mat_npz(zipname,*(dm->get_DMR_pointer(1))); + if(GlobalV::NSPIN == 2) + { + zipname = "output_DM1.npz"; + this->read_mat_npz(zipname,*(dm->get_DMR_pointer(2))); + } + + this->pelec->psiToRho(*this->psi); + + this->create_Output_Rho(0, istep).write(); + if(GlobalV::NSPIN == 2) + { + this->create_Output_Rho(1, istep).write(); + } + + return; + } + // the electron charge density should be symmetrized, // here is the initialization Symmetry_rho srho; diff --git a/source/module_esolver/io_npz.cpp b/source/module_esolver/io_npz.cpp new file mode 100644 index 0000000000..3415ae3004 --- /dev/null +++ b/source/module_esolver/io_npz.cpp @@ -0,0 +1,499 @@ +//Deals with io of dm(r)/h(r) in npz format + +#include "module_esolver/esolver_ks_lcao.h" + +#include "module_base/parallel_reduce.h" +#include "module_cell/module_neighbor/sltk_atom_arrange.h" +#include "module_cell/module_neighbor/sltk_grid_driver.h" +#include "module_hamilt_general/module_xc/xc_functional.h" +#include "module_hamilt_lcao/module_dftu/dftu.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h" +#ifdef __DEEPKS +#include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" //caoyu add 2021-07-26 +#endif +#include "module_base/timer.h" + +#ifdef __MPI +#include +#include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h" +#endif + +#ifdef __USECNPY +#include "cnpy.h" +#endif + +#include "module_base/element_name.h" + +namespace ModuleESolver +{ + +template +void ESolver_KS_LCAO::read_mat_npz(std::string& zipname, hamilt::HContainer& hR) +{ + ModuleBase::TITLE("LCAO_Hamilt","read_mat_npz"); + + const Parallel_Orbitals* paraV = this->LOWF.ParaV; + +#ifdef __USECNPY + +#ifdef __MPI + + if(GlobalV::NPROC!=1) + { + std::cout << "read_mat_npz is not supported in NPI mode yet" << std::endl; + return; + } + +/* + hamilt::HContainer* HR_serial; + Parallel_Orbitals serialV; + serialV.set_proc_dim(1,0); + serialV.mpi_create_cart(MPI_COMM_WORLD); + serialV.set_local2global(GlobalV::NLOCAL, GlobalV::NLOCAL, GlobalV::ofs_running, GlobalV::ofs_warning); + serialV.set_global2local(GlobalV::NLOCAL, GlobalV::NLOCAL, false, GlobalV::ofs_running); + serialV.set_atomic_trace(GlobalC::ucell.get_iat2iwt(), GlobalC::ucell.nat, GlobalV::NLOCAL); +*/ + if(GlobalV::MY_RANK == 0) + { + //HR_serial = new hamilt::HContainer(&serialV); + + cnpy::npz_t my_npz = cnpy::npz_load(zipname); + std::vector fnames; + + //check consistency + // 1. lattice vectors + double* lattice_vector = my_npz["lattice_vectors"].data(); + assert(std::abs(lattice_vector[0] - GlobalC::ucell.lat0 * GlobalC::ucell.a1.x) < 1e-6); + assert(std::abs(lattice_vector[1] - GlobalC::ucell.lat0 * GlobalC::ucell.a1.y) < 1e-6); + assert(std::abs(lattice_vector[2] - GlobalC::ucell.lat0 * GlobalC::ucell.a1.z) < 1e-6); + assert(std::abs(lattice_vector[3] - GlobalC::ucell.lat0 * GlobalC::ucell.a2.x) < 1e-6); + assert(std::abs(lattice_vector[4] - GlobalC::ucell.lat0 * GlobalC::ucell.a2.y) < 1e-6); + assert(std::abs(lattice_vector[5] - GlobalC::ucell.lat0 * GlobalC::ucell.a2.z) < 1e-6); + assert(std::abs(lattice_vector[6] - GlobalC::ucell.lat0 * GlobalC::ucell.a3.x) < 1e-6); + assert(std::abs(lattice_vector[7] - GlobalC::ucell.lat0 * GlobalC::ucell.a3.y) < 1e-6); + assert(std::abs(lattice_vector[8] - GlobalC::ucell.lat0 * GlobalC::ucell.a3.z) < 1e-6); + + // 2. atoms + double* atom_info = my_npz["atom_info"].data(); + for(int iat = 0; iat < GlobalC::ucell.nat; ++iat) + { + const int it = GlobalC::ucell.iat2it[iat]; + const int ia = GlobalC::ucell.iat2ia[iat]; + + //get atomic number (copied from write_cube.cpp) + std::string element = ""; + element = GlobalC::ucell.atoms[it].label; + std::string::iterator temp = element.begin(); + while (temp != element.end()) + { + if ((*temp >= '1') && (*temp <= '9')) + { + temp = element.erase(temp); + } + else + { + temp++; + } + } + int z = 0; + for(int j=0; j!=ModuleBase::element_name.size(); j++) + { + if (element == ModuleBase::element_name[j]) + { + z=j+1; + break; + } + } + + assert(atom_info[iat*5] == it); + assert(atom_info[iat*5+1] == z); + //I will not be checking the coordinates for now in case the direct coordinates provided in the + //npz file do not fall in the range [0,1); if a protocol is to be set in the future such that + //this could be guaranteed, then the following lines could be uncommented + //assert(std::abs(atom_info[iat*5+2] - GlobalC::ucell.atoms[it].taud[ia].x) < 1e-6); + //assert(std::abs(atom_info[iat*5+3] - GlobalC::ucell.atoms[it].taud[ia].y) < 1e-6); + //assert(std::abs(atom_info[iat*5+4] - GlobalC::ucell.atoms[it].taud[ia].z) < 1e-6); + } + + // 3. orbitals + for(int it = 0; it < GlobalC::ucell.ntype; ++it) + { + std::string filename="orbital_info_"+std::to_string(it); + double* orbital_info = my_npz[filename].data(); + for(int iw = 0; iw < GlobalC::ucell.atoms[it].nw; ++iw) + { + assert(orbital_info[iw*3] == GlobalC::ucell.atoms[it].iw2n[iw]); + assert(orbital_info[iw*3+1] == GlobalC::ucell.atoms[it].iw2l[iw]); + const int im = GlobalC::ucell.atoms[it].iw2m[iw]; + const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; // copied from LCAO_gen_fixedH.cpp + assert(orbital_info[iw*3+2] == m); + } + } + + //starts reading the matrix + for(auto const& imap: my_npz) + fnames.push_back(imap.first); + + for(int i = 0; i < fnames.size(); i ++) + { + if(fnames[i].find("mat_") == std::string::npos) continue; + + std::vector tokens; + std::string token; + std::stringstream fname_tmp(fnames[i]); + char delimiter = '_'; + + while (std::getline(fname_tmp, token, delimiter)) { + tokens.push_back(token); + } + + cnpy::NpyArray arr = my_npz[fnames[i]]; + + int iat1 = std::stoi(tokens[1]); + int iat2 = std::stoi(tokens[2]); + int Rx = std::stoi(tokens[3]); + int Ry = std::stoi(tokens[4]); + int Rz = std::stoi(tokens[5]); + + int it1 = GlobalC::ucell.iat2it[iat1]; + int it2 = GlobalC::ucell.iat2it[iat2]; + + assert(arr.shape[0] == GlobalC::ucell.atoms[it1].nw); + assert(arr.shape[1] == GlobalC::ucell.atoms[it2].nw); + + //hamilt::AtomPair tmp(iat1,iat2,Rx,Ry,Rz,&serialV); + //HR_serial->insert_pair(tmp); + hamilt::AtomPair tmp(iat1,iat2,Rx,Ry,Rz,paraV); + hR.insert_pair(tmp); + + // use symmetry : H_{mu,nu,R} = H_{nu,mu,-R} + if(Rx!=0 || Ry!=0 || Rz!=0) + { + //hamilt::AtomPair tmp(iat2,iat1,-Rx,-Ry,-Rz,&serialV); + //HR_serial->insert_pair(tmp); + hamilt::AtomPair tmp(iat2,iat1,-Rx,-Ry,-Rz,paraV); + hR.insert_pair(tmp); + } + } + + //HR_serial->allocate(); + hR.allocate(); + + for(int i = 0; i < fnames.size(); i ++) + { + if(fnames[i].find("mat_") == std::string::npos) continue; + + std::vector tokens; + std::string token; + std::stringstream fname_tmp(fnames[i]); + char delimiter = '_'; + + while (std::getline(fname_tmp, token, delimiter)) { + tokens.push_back(token); + } + + cnpy::NpyArray arr = my_npz[fnames[i]]; + + int iat1 = std::stoi(tokens[1]); + int iat2 = std::stoi(tokens[2]); + int Rx = std::stoi(tokens[3]); + int Ry = std::stoi(tokens[4]); + int Rz = std::stoi(tokens[5]); + + int it1 = GlobalC::ucell.iat2it[iat1]; + int it2 = GlobalC::ucell.iat2it[iat2]; + + assert(arr.shape[0] == GlobalC::ucell.atoms[it1].nw); + assert(arr.shape[1] == GlobalC::ucell.atoms[it2].nw); + + double* submat_read = arr.data(); + + //hamilt::BaseMatrix* submat = HR_serial->find_matrix(iat1,iat2,Rx,Ry,Rz); + hamilt::BaseMatrix* submat = hR.find_matrix(iat1,iat2,Rx,Ry,Rz); + + for(int i = 0; i < arr.shape[0]; i ++) + { + for(int j = 0; j < arr.shape[1]; j ++) + { + submat->add_element(i,j,submat_read[i*arr.shape[1]+j]); + } + } + + // use symmetry : H_{mu,nu,R} = H_{nu,mu,-R} + if(Rx!=0 || Ry!=0 || Rz!=0) + { + //hamilt::BaseMatrix* submat = HR_serial->find_matrix(iat2,iat1,-Rx,-Ry,-Rz); + hamilt::BaseMatrix* submat = hR.find_matrix(iat2,iat1,-Rx,-Ry,-Rz); + for(int i = 0; i < arr.shape[0]; i ++) + { + for(int j = 0; j < arr.shape[1]; j ++) + { + submat->add_element(j,i,submat_read[i*arr.shape[1]+j]); + } + } + } + } + + } +#else + + cnpy::npz_t my_npz = cnpy::npz_load(zipname); + std::vector fnames; + + for(auto const& imap: my_npz) + fnames.push_back(imap.first); + + for(int i = 0; i < fnames.size(); i ++) + { + if(fnames[i].find("mat_") == std::string::npos) continue; + + std::vector tokens; + std::string token; + std::stringstream fname_tmp(fnames[i]); + char delimiter = '_'; + + while (std::getline(fname_tmp, token, delimiter)) { + tokens.push_back(token); + } + + cnpy::NpyArray arr = my_npz[fnames[i]]; + + int iat1 = std::stoi(tokens[1]); + int iat2 = std::stoi(tokens[2]); + int Rx = std::stoi(tokens[3]); + int Ry = std::stoi(tokens[4]); + int Rz = std::stoi(tokens[5]); + + int it1 = GlobalC::ucell.iat2it[iat1]; + int it2 = GlobalC::ucell.iat2it[iat2]; + + assert(arr.shape[0] == GlobalC::ucell.atoms[it1].nw); + assert(arr.shape[1] == GlobalC::ucell.atoms[it2].nw); + + hamilt::AtomPair tmp(iat1,iat2,Rx,Ry,Rz,paraV); + hR->insert_pair(tmp); + // use symmetry : H_{mu,nu,R} = H_{nu,mu,-R} + if(Rx!=0 || Ry!=0 || Rz!=0) + { + hamilt::AtomPair tmp(iat2,iat1,-Rx,-Ry,-Rz,paraV); + hR->insert_pair(tmp); + } + } + + hR->allocate(); + + for(int i = 0; i < fnames.size(); i ++) + { + if(fnames[i].find("mat_") == std::string::npos) continue; + + std::vector tokens; + std::string token; + std::stringstream fname_tmp(fnames[i]); + char delimiter = '_'; + + while (std::getline(fname_tmp, token, delimiter)) { + tokens.push_back(token); + } + + cnpy::NpyArray arr = my_npz[fnames[i]]; + + int iat1 = std::stoi(tokens[1]); + int iat2 = std::stoi(tokens[2]); + int Rx = std::stoi(tokens[3]); + int Ry = std::stoi(tokens[4]); + int Rz = std::stoi(tokens[5]); + + int it1 = GlobalC::ucell.iat2it[iat1]; + int it2 = GlobalC::ucell.iat2it[iat2]; + + assert(arr.shape[0] == GlobalC::ucell.atoms[it1].nw); + assert(arr.shape[1] == GlobalC::ucell.atoms[it2].nw); + + double* submat_read = arr.data(); + + hamilt::BaseMatrix* submat = hR->find_matrix(iat1,iat2,Rx,Ry,Rz); + + for(int i = 0; i < arr.shape[0]; i ++) + { + for(int j = 0; j < arr.shape[1]; j ++) + { + submat->add_element(i,j,submat_read[i*arr.shape[1]+j]); + } + } + + // use symmetry : H_{mu,nu,R} = H_{nu,mu,-R} + if(Rx!=0 || Ry!=0 || Rz!=0) + { + hamilt::BaseMatrix* submat = hR->find_matrix(iat2,iat1,-Rx,-Ry,-Rz); + for(int i = 0; i < arr.shape[0]; i ++) + { + for(int j = 0; j < arr.shape[1]; j ++) + { + submat->add_element(j,i,submat_read[i*arr.shape[1]+j]); + } + } + } + } + +#endif + +#endif +} + +template +void ESolver_KS_LCAO::output_mat_npz(std::string& zipname, const hamilt::HContainer& hR) +{ + ModuleBase::TITLE("LCAO_Hamilt","output_mat_npz"); + +#ifdef __USECNPY + std::string filename = ""; + + if(GlobalV::MY_RANK == 0) + { + +// first block: lattice vectors + filename = "lattice_vectors"; + std::vector lattice_vectors; + lattice_vectors.resize(9); + lattice_vectors[0] = GlobalC::ucell.lat0 * GlobalC::ucell.a1.x; + lattice_vectors[1] = GlobalC::ucell.lat0 * GlobalC::ucell.a1.y; + lattice_vectors[2] = GlobalC::ucell.lat0 * GlobalC::ucell.a1.z; + lattice_vectors[3] = GlobalC::ucell.lat0 * GlobalC::ucell.a2.x; + lattice_vectors[4] = GlobalC::ucell.lat0 * GlobalC::ucell.a2.y; + lattice_vectors[5] = GlobalC::ucell.lat0 * GlobalC::ucell.a2.z; + lattice_vectors[6] = GlobalC::ucell.lat0 * GlobalC::ucell.a3.x; + lattice_vectors[7] = GlobalC::ucell.lat0 * GlobalC::ucell.a3.y; + lattice_vectors[8] = GlobalC::ucell.lat0 * GlobalC::ucell.a3.z; + + cnpy::npz_save(zipname,filename,lattice_vectors); + +// second block: atom info + filename = "atom_info"; + double* atom_info = new double[GlobalC::ucell.nat*5]; + for(int iat = 0; iat < GlobalC::ucell.nat; ++iat) + { + const int it = GlobalC::ucell.iat2it[iat]; + const int ia = GlobalC::ucell.iat2ia[iat]; + + //get atomic number (copied from write_cube.cpp) + std::string element = ""; + element = GlobalC::ucell.atoms[it].label; + std::string::iterator temp = element.begin(); + while (temp != element.end()) + { + if ((*temp >= '1') && (*temp <= '9')) + { + temp = element.erase(temp); + } + else + { + temp++; + } + } + int z = 0; + for(int j=0; j!=ModuleBase::element_name.size(); j++) + { + if (element == ModuleBase::element_name[j]) + { + z=j+1; + break; + } + } + + atom_info[iat*5] = it; + atom_info[iat*5+1] = z; + atom_info[iat*5+2] = GlobalC::ucell.atoms[it].taud[ia].x; + atom_info[iat*5+3] = GlobalC::ucell.atoms[it].taud[ia].y; + atom_info[iat*5+4] = GlobalC::ucell.atoms[it].taud[ia].z; + } + std::vector shape={(size_t)GlobalC::ucell.nat,5}; + + cnpy::npz_save(zipname,filename,atom_info,shape,"a"); + delete[] atom_info; + +//third block: orbital info + for(int it = 0; it < GlobalC::ucell.ntype; ++it) + { + filename="orbital_info_"+std::to_string(it); + double* orbital_info = new double[GlobalC::ucell.atoms[it].nw*3]; + for(int iw = 0; iw < GlobalC::ucell.atoms[it].nw; ++iw) + { + orbital_info[iw*3] = GlobalC::ucell.atoms[it].iw2n[iw]; + orbital_info[iw*3+1] = GlobalC::ucell.atoms[it].iw2l[iw]; + const int im = GlobalC::ucell.atoms[it].iw2m[iw]; + const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; // copied from LCAO_gen_fixedH.cpp + orbital_info[iw*3+2] = m; + } + shape={(size_t)GlobalC::ucell.atoms[it].nw,3}; + + cnpy::npz_save(zipname,filename,orbital_info,shape,"a"); + } + } + +//fourth block: hr(i0,jR) +#ifdef __MPI + hamilt::HContainer* HR_serial; + Parallel_Orbitals serialV; + serialV.set_global2local(GlobalV::NLOCAL, GlobalV::NLOCAL, false, GlobalV::ofs_running); + serialV.set_atomic_trace(GlobalC::ucell.get_iat2iwt(), GlobalC::ucell.nat, GlobalV::NLOCAL); + if(GlobalV::MY_RANK == 0) + { + HR_serial = new hamilt::HContainer(&serialV); + } + hamilt::gatherParallels(hR, HR_serial, 0); + + if(GlobalV::MY_RANK==0) + { + for(int iap=0;iap atom_j) continue; + int start_i = serialV.atom_begin_row[atom_i]; + int start_j = serialV.atom_begin_col[atom_j]; + int row_size = serialV.get_row_size(atom_i); + int col_size = serialV.get_col_size(atom_j); + for(int iR=0;iR shape = {(size_t)row_size,(size_t)col_size}; + cnpy::npz_save(zipname,filename,matrix.get_pointer(),shape,"a"); + } + } + + } +#else + const Parallel_Orbitals* paraV = this->LM->ParaV; + auto row_indexes = paraV->get_indexes_row(); + auto col_indexes = paraV->get_indexes_col(); + for(int iap=0;iapatom_begin_row[atom_i]; + int start_j = paraV->atom_begin_col[atom_j]; + int row_size = paraV->get_row_size(atom_i); + int col_size = paraV->get_col_size(atom_j); + for(int iR=0;iR shape = {(size_t)row_size,(size_t)col_size}; + cnpy::npz_save(zipname,filename,matrix.get_pointer(),shape,"a"); + } + } +#endif +#endif +} + +template class ESolver_KS_LCAO; +template class ESolver_KS_LCAO, double>; +template class ESolver_KS_LCAO, std::complex>; +} \ No newline at end of file diff --git a/source/module_io/input.cpp b/source/module_io/input.cpp index 60fc3ce47b..4fe9acac6d 100644 --- a/source/module_io/input.cpp +++ b/source/module_io/input.cpp @@ -346,6 +346,9 @@ void Input::Default(void) out_proj_band = 0; out_mat_hs = {0, 8}; out_mat_xc = 0; + out_hr_npz = 0; + out_dm_npz = 0; + dm_to_rho = 0; cal_syns = 0; dmax = 0.01; out_mat_hs2 = 0; // LiuXh add 2019-07-15 @@ -1435,6 +1438,18 @@ bool Input::Read(const std::string& fn) { read_bool(ifs, out_mat_xc); } + else if (strcmp("out_hr_npz", word) == 0) + { + read_bool(ifs, out_hr_npz); + } + else if (strcmp("out_dm_npz", word) == 0) + { + read_bool(ifs, out_dm_npz); + } + else if (strcmp("dm_to_rho", word) == 0) + { + read_bool(ifs, dm_to_rho); + } else if (strcmp("out_interval", word) == 0) { read_value(ifs, out_interval); @@ -2637,10 +2652,20 @@ bool Input::Read(const std::string& fn) gamma_only_local = 0; } } - if ((out_mat_r || out_mat_hs2 || out_mat_t || out_mat_dh) && gamma_only_local) + if ((out_mat_r || out_mat_hs2 || out_mat_t || out_mat_dh || out_hr_npz || out_dm_npz || dm_to_rho) && gamma_only_local) { ModuleBase::WARNING_QUIT("Input", - "printing of H(R)/S(R)/dH(R)/T(R) is not available for gamma only calculations"); + "printing of H(R)/S(R)/dH(R)/T(R)/DM(R) is not available for gamma only calculations"); + } + if(dm_to_rho && GlobalV::NPROC > 1) + { + ModuleBase::WARNING_QUIT("Input", "dm_to_rho is not available for parallel calculations"); + } + if(out_hr_npz || out_dm_npz || dm_to_rho) + { +#ifndef __USECNPY + ModuleBase::WARNING_QUIT("Input", "to write in npz format, please recompile with -DENABLE_CNPY=1"); +#endif } if (out_mat_dh && nspin == 4) { @@ -3411,6 +3436,9 @@ void Input::Bcast() Parallel_Common::bcast_bool(out_mat_t); Parallel_Common::bcast_bool(out_mat_dh); Parallel_Common::bcast_bool(out_mat_xc); + Parallel_Common::bcast_bool(out_hr_npz); + Parallel_Common::bcast_bool(out_dm_npz); + Parallel_Common::bcast_bool(dm_to_rho); Parallel_Common::bcast_bool(out_mat_r); // jingan add 2019-8-14 Parallel_Common::bcast_int(out_wfc_lcao); Parallel_Common::bcast_bool(out_alllog); diff --git a/source/module_io/input.h b/source/module_io/input.h index 857068d2da..a4f9f18895 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -271,6 +271,9 @@ class Input bool out_proj_band; // projected band structure calculation jiyy add 2022-05-11 std::vector out_mat_hs; // output H matrix and S matrix in local basis. bool out_mat_xc; // output exchange-correlation matrix in KS-orbital representation. + bool out_hr_npz;// output exchange-correlation matrix in KS-orbital representation. + bool out_dm_npz; + bool dm_to_rho; bool cal_syns; // calculate asynchronous S matrix to output double dmax; // maximum displacement of all atoms in one step (bohr) bool out_mat_hs2; // LiuXh add 2019-07-16, output H(R) matrix and S(R) matrix in local basis. diff --git a/source/module_io/input_conv.cpp b/source/module_io/input_conv.cpp index 229c517ef8..9a9dd1896b 100644 --- a/source/module_io/input_conv.cpp +++ b/source/module_io/input_conv.cpp @@ -684,6 +684,9 @@ void Input_Conv::Convert(void) hsolver::HSolverLCAO>::out_mat_t = INPUT.out_mat_t; hsolver::HSolverLCAO>::out_mat_dh = INPUT.out_mat_dh; GlobalV::out_mat_xc = INPUT.out_mat_xc; + GlobalV::out_hr_npz = INPUT.out_hr_npz; + GlobalV::out_dm_npz = INPUT.out_dm_npz; + GlobalV::dm_to_rho = INPUT.dm_to_rho; if (GlobalV::GAMMA_ONLY_LOCAL) { elecstate::ElecStateLCAO::out_wfc_lcao = INPUT.out_wfc_lcao; diff --git a/source/module_io/parameter_pool.cpp b/source/module_io/parameter_pool.cpp index 9be1fe00ac..a3852ccd6a 100644 --- a/source/module_io/parameter_pool.cpp +++ b/source/module_io/parameter_pool.cpp @@ -926,6 +926,18 @@ void input_parameters_set(std::map input_parameters { INPUT.out_mat_xc = *static_cast(input_parameters["out_mat_xc"].get()); } + else if (input_parameters.count("out_hr_npz") != 0) + { + INPUT.out_hr_npz = *static_cast(input_parameters["out_hr_npz"].get()); + } + else if (input_parameters.count("out_dm_npz") != 0) + { + INPUT.out_dm_npz = *static_cast(input_parameters["out_dm_npz"].get()); + } + else if (input_parameters.count("dm_to_rho") != 0) + { + INPUT.dm_to_rho = *static_cast(input_parameters["dm_to_rho"].get()); + } else if (input_parameters.count("cal_syns") != 0) { INPUT.cal_syns = *static_cast(input_parameters["cal_syns"].get()); diff --git a/source/module_io/test/input_conv_test.cpp b/source/module_io/test/input_conv_test.cpp index fac85bc51d..ea5d2f5266 100644 --- a/source/module_io/test/input_conv_test.cpp +++ b/source/module_io/test/input_conv_test.cpp @@ -154,6 +154,9 @@ TEST_F(InputConvTest, Conv) EXPECT_EQ(hsolver::HSolverLCAO::out_mat_dh, INPUT.out_mat_dh); EXPECT_EQ(hsolver::HSolverLCAO>::out_mat_dh, INPUT.out_mat_dh); EXPECT_EQ(GlobalV::out_mat_xc, false); + EXPECT_EQ(GlobalV::out_hr_npz, false); + EXPECT_EQ(GlobalV::out_dm_npz, false); + EXPECT_EQ(GlobalV::dm_to_rho, false); EXPECT_EQ(GlobalV::out_interval, 1); EXPECT_EQ(elecstate::ElecStateLCAO::out_wfc_lcao, false); EXPECT_EQ(berryphase::berry_phase_flag, false); diff --git a/source/module_io/test/write_input_test.cpp b/source/module_io/test/write_input_test.cpp index 8d3b8640df..8f9059a67e 100644 --- a/source/module_io/test/write_input_test.cpp +++ b/source/module_io/test/write_input_test.cpp @@ -324,6 +324,9 @@ TEST_F(write_input, LCAO5) testing::HasSubstr("lcao_rmax 30 #max R for 1D two-center integration table")); EXPECT_THAT(output, testing::HasSubstr("out_mat_hs 0 #output H and S matrix")); EXPECT_THAT(output, testing::HasSubstr("out_mat_xc 0 #output exchange-correlation matrix in KS-orbital representation")); + EXPECT_THAT(output, testing::HasSubstr("out_hr_npz 0 #output hr(I0,JR) submatrices in npz format")); + EXPECT_THAT(output, testing::HasSubstr("out_dm_npz 0 #output dmr(I0,JR) submatrices in npz format")); + EXPECT_THAT(output, testing::HasSubstr("dm_to_rho 0 #reads dmr in npz format and calculates electron density")); EXPECT_THAT(output, testing::HasSubstr("out_mat_hs2 0 #output H(R) and S(R) matrix")); EXPECT_THAT(output, testing::HasSubstr("out_mat_dh 0 #output of derivative of H(R) matrix")); EXPECT_THAT( diff --git a/source/module_io/write_input.cpp b/source/module_io/write_input.cpp index cfb7174508..5035a618b5 100644 --- a/source/module_io/write_input.cpp +++ b/source/module_io/write_input.cpp @@ -229,6 +229,9 @@ ModuleBase::GlobalFunc::OUTP(ofs, "out_bandgap", out_bandgap, "if true, print ou ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_hs2", out_mat_hs2, "output H(R) and S(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_dh", out_mat_dh, "output of derivative of H(R) matrix"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_xc", out_mat_xc, "output exchange-correlation matrix in KS-orbital representation"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_hr_npz", out_hr_npz, "output hr(I0,JR) submatrices in npz format"); + ModuleBase::GlobalFunc::OUTP(ofs, "out_dm_npz", out_dm_npz, "output dmr(I0,JR) submatrices in npz format"); + ModuleBase::GlobalFunc::OUTP(ofs, "dm_to_rho", dm_to_rho, "reads dmr in npz format and calculates electron density"); ModuleBase::GlobalFunc::OUTP(ofs, "out_interval", out_interval, "interval for printing H(R) and S(R) matrix during MD"); ModuleBase::GlobalFunc::OUTP(ofs, "out_app_flag", out_app_flag, "whether output r(R), H(R), S(R), T(R), and dH(R) matrices in an append manner during MD"); ModuleBase::GlobalFunc::OUTP(ofs, "out_mat_t", out_mat_t, "output T(R) matrix"); From 579048d8b0b553657502fa43c1d4f1aaf4e2e387 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Tue, 30 Apr 2024 15:06:55 +0800 Subject: [PATCH 11/18] pin MPI version (#4065) --- Dockerfile.intel | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Dockerfile.intel b/Dockerfile.intel index 39ca4c1431..269713a16c 100644 --- a/Dockerfile.intel +++ b/Dockerfile.intel @@ -16,18 +16,16 @@ RUN apt-get update && \ intel-oneapi-compiler-dpcpp-cpp \ intel-oneapi-compiler-fortran \ intel-oneapi-mkl-devel \ - intel-oneapi-mpi-devel \ + intel-oneapi-mpi-devel="2021.11.*" \ intel-oneapi-vtune - - -ENV I_MPI_ROOT='/opt/intel/oneapi/mpi/latest' \ +ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest \ LIBRARY_PATH=/opt/intel/oneapi/tbb/latest/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/ippcp/latest/lib/:/opt/intel/oneapi/ipp/latest/lib:/opt/intel/oneapi/dpl/latest/lib:/opt/intel/oneapi/dnnl/latest/lib:/opt/intel/oneapi/dal/latest/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/ccl/latest/lib/ \ LD_LIBRARY_PATH=/opt/intel/oneapi/tbb/latest/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/itac/latest/slib:/opt/intel/oneapi/ippcp/latest/lib/:/opt/intel/oneapi/ipp/latest/lib:/opt/intel/oneapi/dpl/latest/lib:/opt/intel/oneapi/dnnl/latest/lib:/opt/intel/oneapi/debugger/latest/opt/debugger/lib:/opt/intel/oneapi/dal/latest/lib:/opt/intel/oneapi/compiler/latest/opt/oclfpga/host/linux64/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/ccl/latest/lib/ \ PATH=/opt/intel/oneapi/vtune/latest/bin64:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/itac/latest/bin:/opt/intel/oneapi/inspector/latest/bin64:/opt/intel/oneapi/dpcpp-ct/latest/bin:/opt/intel/oneapi/dev-utilities/latest/bin:/opt/intel/oneapi/debugger/latest/opt/debugger/bin:/opt/intel/oneapi/compiler/latest/opt/oclfpga/bin:/opt/intel/oneapi/compiler/latest/bin:/opt/intel/oneapi/advisor/latest/bin64:/opt/mamba/bin:/opt/mamba/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ - MKLROOT='/opt/intel/oneapi/mkl/latest' \ - FI_PROVIDER_PATH='/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric' \ - CMAKE_PREFIX_PATH='/opt/intel/oneapi/tbb/latest/env/..:/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/ipp/latest/lib/cmake/ipp:/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL:/opt/intel/oneapi/dnnl/latest/lib/cmake:/opt/intel/oneapi/dal/latest:/opt/intel/oneapi/compiler/latest' \ - CMPLR_ROOT='/opt/intel/oneapi/compiler/latest' + MKLROOT=/opt/intel/oneapi/mkl/latest \ + FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric \ + CMAKE_PREFIX_PATH=/opt/intel/oneapi/tbb/latest/env/..:/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/ipp/latest/lib/cmake/ipp:/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL:/opt/intel/oneapi/dnnl/latest/lib/cmake:/opt/intel/oneapi/dal/latest:/opt/intel/oneapi/compiler/latest \ + CMPLR_ROOT=/opt/intel/oneapi/compiler/latest SHELL ["/bin/bash", "-c"] ENV CC=mpiicx CXX=mpiicpx FC=mpiifx From a2a0e19082284742f54171cde77c2384ee689c63 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Tue, 30 Apr 2024 16:10:14 +0800 Subject: [PATCH 12/18] add some docs (#4073) --- docs/advanced/input_files/input-main.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index f1345f04c4..993e250478 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -438,7 +438,7 @@ These variables are used to control general system parameters. - **Type**: Integer - **Description**: takes value 1, 0 or -1. - - -1: No symmetry will be considered. + - -1: No symmetry will be considered. It is recommended to set -1 for non-colinear + soc calculations, where time reversal symmetry is broken sometimes. - 0: Only time reversal symmetry would be considered in symmetry operations, which implied k point and -k point would be treated as a single k point with twice the weight. - 1: Symmetry analysis will be performed to determine the type of Bravais lattice and associated symmetry operations. (point groups, space groups, primitive cells, and irreducible k-points) - **Default**: From 1439426b631f1dccfbecb1806915e18b2c566722 Mon Sep 17 00:00:00 2001 From: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Date: Tue, 30 Apr 2024 17:02:18 +0800 Subject: [PATCH 13/18] Fix: use std::abs() instead of abs() (#4076) --- source/module_elecstate/module_charge/charge_mixing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp index 57083301b2..e327904d0e 100755 --- a/source/module_elecstate/module_charge/charge_mixing.cpp +++ b/source/module_elecstate/module_charge/charge_mixing.cpp @@ -504,7 +504,7 @@ void Charge_Mixing::mix_rho_recip(Charge* chr) { chr->rhog[0][ig] = rhog_magabs[ig]; // rhog double norm = std::sqrt(chr->rho[1][ig] * chr->rho[1][ig] + chr->rho[2][ig] * chr->rho[2][ig] + chr->rho[3][ig] * chr->rho[3][ig]); - if (abs(norm) < 1e-10) continue; + if (std::abs(norm) < 1e-10) continue; double rescale_tmp = rho_magabs[npw + ig] / norm; chr->rho[1][ig] *= rescale_tmp; chr->rho[2][ig] *= rescale_tmp; From efb67a9725848f9f829b14af5c64b3cb1436be20 Mon Sep 17 00:00:00 2001 From: Taoni Bao Date: Sat, 4 May 2024 17:01:24 +0800 Subject: [PATCH 14/18] Refactor istate charge to reduce dependence on other modules (#4086) --- source/module_elecstate/fp_energy.cpp | 47 +++++-- source/module_elecstate/fp_energy.h | 11 +- .../module_esolver/esolver_ks_lcao_elec.cpp | 132 +++++++++--------- source/module_io/input.h | 9 ++ source/module_io/istate_charge.cpp | 120 +++++++++------- source/module_io/istate_charge.h | 38 +++-- 6 files changed, 206 insertions(+), 151 deletions(-) diff --git a/source/module_elecstate/fp_energy.cpp b/source/module_elecstate/fp_energy.cpp index 47ed45f400..767c18cebe 100644 --- a/source/module_elecstate/fp_energy.cpp +++ b/source/module_elecstate/fp_energy.cpp @@ -1,36 +1,38 @@ #include "fp_energy.h" + #include "module_base/global_variable.h" #ifdef USE_PAW #include "module_cell/module_paw/paw_cell.h" #endif +#include "module_base/tool_quit.h" + #include #include -#include "module_base/tool_quit.h" namespace elecstate { /// @brief calculate etot double fenergy::calculate_etot() { - if(GlobalV::use_paw) + if (GlobalV::use_paw) { - etot = eband + deband + etxc + ewald_energy - hartree_energy + demet + descf + exx + efield + gatefield - + evdw + esol_el + esol_cav + edftu + edeepks_scf; + etot = eband + deband + etxc + ewald_energy - hartree_energy + demet + descf + exx + efield + gatefield + evdw + + esol_el + esol_cav + edftu + edeepks_scf; } else { - etot = eband + deband + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield + gatefield - + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon; + etot = eband + deband + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield + + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon; } #ifdef USE_PAW - if(GlobalV::use_paw) + if (GlobalV::use_paw) { double ecore = GlobalC::paw_cell.calculate_ecore(); double epawdc = GlobalC::paw_cell.get_epawdc(); - etot += ( ecore + epawdc ); + etot += (ecore + epawdc); } #endif return etot; @@ -39,22 +41,22 @@ double fenergy::calculate_etot() /// @brief calculate etot_harris double fenergy::calculate_harris() { - if(GlobalV::use_paw) + if (GlobalV::use_paw) { etot_harris = eband + deband_harris + etxc + ewald_energy - hartree_energy + demet + descf + exx + efield - + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf; + + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf; } else { - etot_harris = eband + deband_harris + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + efield - + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon; + etot_harris = eband + deband_harris + (etxc - etxcc) + ewald_energy + hartree_energy + demet + descf + exx + + efield + gatefield + evdw + esol_el + esol_cav + edftu + edeepks_scf + escon; } #ifdef USE_PAW - if(GlobalV::use_paw) + if (GlobalV::use_paw) { double ecore = GlobalC::paw_cell.calculate_ecore(); double epawdc = GlobalC::paw_cell.get_epawdc(); - etot_harris += ( ecore + epawdc ); + etot_harris += (ecore + epawdc); } #endif return etot_harris; @@ -64,7 +66,8 @@ double fenergy::calculate_harris() void fenergy::clear_all() { etot = etot_old = eband = deband = etxc = etxcc = vtxc = ewald_energy = hartree_energy = demet = descf = exx - = efield = gatefield = evdw = etot_harris = deband_harris = esol_el = esol_cav = edftu = edeepks_scf = escon = 0.0; + = efield = gatefield = evdw = etot_harris = deband_harris = esol_el = esol_cav = edftu = edeepks_scf = escon + = 0.0; } /// @brief print all energies @@ -140,4 +143,18 @@ double efermi::get_efval(const int& is) const } } +/// @brief get all fermi energies for all spins +/// @return all fermi energies for all spins +std::vector efermi::get_all_ef() const +{ + if (two_efermi) + { + return {ef_up, ef_dw}; + } + else + { + return {ef, ef}; // For NSPIN=1, ef_up=ef_dw=ef + } +} + } // namespace elecstate diff --git a/source/module_elecstate/fp_energy.h b/source/module_elecstate/fp_energy.h index 4d3e8d4ec3..9c922a3276 100644 --- a/source/module_elecstate/fp_energy.h +++ b/source/module_elecstate/fp_energy.h @@ -1,3 +1,5 @@ +#include + /** * @file fp_energy.h * @brief This file contains all energies about first-principle calculations @@ -12,8 +14,8 @@ namespace elecstate */ struct fenergy { - double etot = 0.0; ///< the total free energy - double etot_old = 0.0; ///< old total free energy + double etot = 0.0; ///< the total free energy + double etot_old = 0.0; ///< old total free energy double etot_delta = 0.0; // the difference of total energy between two steps = etot - etot_old double eband = 0.0; ///< the band energy @@ -42,8 +44,8 @@ struct fenergy double escon = 0.0; ///< spin constraint energy - double ekinetic = 0.0; /// kinetic energy, used in OFDFT - double eion_elec = 0.0; /// ion-electron interaction energy, used in OFDFT + double ekinetic = 0.0; /// kinetic energy, used in OFDFT + double eion_elec = 0.0; /// ion-electron interaction energy, used in OFDFT double calculate_etot(); double calculate_harris(); @@ -63,6 +65,7 @@ struct efermi bool two_efermi = false; ///< double& get_ef(const int& is); double get_efval(const int& is) const; + std::vector get_all_ef() const; }; } // namespace elecstate diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index 3646858a49..aa8e53bd7c 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -19,11 +19,10 @@ #include "module_elecstate/elecstate_lcao.h" #include "module_hamilt_general/module_ewald/H_Ewald_pw.h" #include "module_hamilt_general/module_vdw/vdw.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" #include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h" -#include "module_io/dm_io.h" - #include "module_hamilt_lcao/module_deltaspin/spin_constrain.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_io/dm_io.h" namespace ModuleESolver { @@ -102,8 +101,8 @@ void ESolver_KS_LCAO::beforesolver(const int istep) // init psi if (this->psi == nullptr) { - int nsk=0; - int ncol=0; + int nsk = 0; + int ncol = 0; if (GlobalV::GAMMA_ONLY_LOCAL) { nsk = GlobalV::NSPIN; @@ -127,12 +126,7 @@ void ESolver_KS_LCAO::beforesolver(const int istep) } // prepare grid in Gint - LCAO_domain::grid_prepare( - this->GridT, - this->GG, - this->GK, - *this->pw_rho, - *this->pw_big); + LCAO_domain::grid_prepare(this->GridT, this->GG, this->GK, *this->pw_rho, *this->pw_big); // init Hamiltonian if (this->p_hamilt != nullptr) @@ -143,7 +137,8 @@ void ESolver_KS_LCAO::beforesolver(const int istep) if (this->p_hamilt == nullptr) { elecstate::DensityMatrix* DM = dynamic_cast*>(this->pelec)->get_DM(); - this->p_hamilt = new hamilt::HamiltLCAO(GlobalV::GAMMA_ONLY_LOCAL ? &(this->GG) : nullptr, + this->p_hamilt = new hamilt::HamiltLCAO( + GlobalV::GAMMA_ONLY_LOCAL ? &(this->GG) : nullptr, GlobalV::GAMMA_ONLY_LOCAL ? nullptr : &(this->GK), &(this->gen_h), &(this->LM), @@ -270,12 +265,12 @@ void ESolver_KS_LCAO::beforesolver(const int istep) //========================================================= // cal_ux should be called before init_scf because // the direction of ux is used in noncoline_rho - //========================================================= - if(GlobalV::NSPIN == 4 && GlobalV::DOMAG) - { - GlobalC::ucell.cal_ux(); - } - ModuleBase::timer::tick("ESolver_KS_LCAO", "beforesolver"); + //========================================================= + if (GlobalV::NSPIN == 4 && GlobalV::DOMAG) + { + GlobalC::ucell.cal_ux(); + } + ModuleBase::timer::tick("ESolver_KS_LCAO", "beforesolver"); } template @@ -311,15 +306,15 @@ void ESolver_KS_LCAO::before_scf(int istep) this->beforesolver(istep); // Peize Lin add 2016-12-03 -#ifdef __EXX // set xc type before the first cal of xc in pelec->init_scf - if (GlobalC::exx_info.info_ri.real_number) - { - this->exd->exx_beforescf(this->kv, *this->p_chgmix); - } - else - { - this->exc->exx_beforescf(this->kv, *this->p_chgmix); - } +#ifdef __EXX // set xc type before the first cal of xc in pelec->init_scf + if (GlobalC::exx_info.info_ri.real_number) + { + this->exd->exx_beforescf(this->kv, *this->p_chgmix); + } + else + { + this->exc->exx_beforescf(this->kv, *this->p_chgmix); + } #endif // __EXX this->pelec->init_scf(istep, this->sf.strucFac); @@ -329,22 +324,22 @@ void ESolver_KS_LCAO::before_scf(int istep) ->get_DM() ->init_DMR(*(dynamic_cast*>(this->p_hamilt)->getHR())); - if(GlobalV::dm_to_rho) + if (GlobalV::dm_to_rho) { std::string zipname = "output_DM0.npz"; elecstate::DensityMatrix* dm = dynamic_cast*>(this->pelec)->get_DM(); - this->read_mat_npz(zipname,*(dm->get_DMR_pointer(1))); - if(GlobalV::NSPIN == 2) + this->read_mat_npz(zipname, *(dm->get_DMR_pointer(1))); + if (GlobalV::NSPIN == 2) { zipname = "output_DM1.npz"; - this->read_mat_npz(zipname,*(dm->get_DMR_pointer(2))); + this->read_mat_npz(zipname, *(dm->get_DMR_pointer(2))); } this->pelec->psiToRho(*this->psi); this->create_Output_Rho(0, istep).write(); - if(GlobalV::NSPIN == 2) + if (GlobalV::NSPIN == 2) { this->create_Output_Rho(1, istep).write(); } @@ -358,7 +353,7 @@ void ESolver_KS_LCAO::before_scf(int istep) for (int is = 0; is < GlobalV::NSPIN; is++) { srho.begin(is, *(this->pelec->charge), this->pw_rho, GlobalC::Pgrid, GlobalC::ucell.symm); - } + } // 1. calculate ewald energy. // mohan update 2021-02-25 @@ -423,11 +418,20 @@ void ESolver_KS_LCAO::others(const int istep) { IState_Charge ISC(this->psi, this->LOC); ISC.begin(this->GG, - this->pelec, - this->pw_rho, - this->pw_big, + this->pelec->charge->rho, + this->pelec->wg, + this->pelec->eferm.get_all_ef(), + this->pw_rho->nrxx, + this->pw_rho->nplane, + this->pw_rho->startz_current, + this->pw_rho->nx, + this->pw_rho->ny, + this->pw_rho->nz, + this->pw_big->bz, + this->pw_big->nbz, GlobalV::GAMMA_ONLY_LOCAL, GlobalV::NBANDS_ISTATE, + INPUT.get_out_band_kb(), GlobalV::NBANDS, GlobalV::nelec, GlobalV::NSPIN, @@ -481,7 +485,6 @@ void ESolver_KS_LCAO::others(const int istep) return; } - template <> void ESolver_KS_LCAO::get_S(void) { @@ -489,7 +492,6 @@ void ESolver_KS_LCAO::get_S(void) ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::get_S", "not implemented for"); } - template <> void ESolver_KS_LCAO, double>::get_S(void) { @@ -523,7 +525,6 @@ void ESolver_KS_LCAO, double>::get_S(void) return; } - template <> void ESolver_KS_LCAO, std::complex>::get_S(void) { @@ -546,8 +547,7 @@ void ESolver_KS_LCAO, std::complex>::get_S(void) this->LM.ParaV = &this->orb_con.ParaV; if (this->p_hamilt == nullptr) { - this->p_hamilt - = new hamilt::HamiltLCAO, std::complex>(&this->LM, this->kv); + this->p_hamilt = new hamilt::HamiltLCAO, std::complex>(&this->LM, this->kv); dynamic_cast, std::complex>*>(this->p_hamilt->ops) ->contributeHR(); } @@ -557,7 +557,6 @@ void ESolver_KS_LCAO, std::complex>::get_S(void) return; } - template void ESolver_KS_LCAO::nscf(void) { @@ -661,36 +660,31 @@ void ESolver_KS_LCAO::nscf(void) #ifdef __LCAO if (INPUT.wannier_method == 1) { - toWannier90_LCAO_IN_PW myWannier( - INPUT.out_wannier_mmn, - INPUT.out_wannier_amn, - INPUT.out_wannier_unk, - INPUT.out_wannier_eig, - INPUT.out_wannier_wvfn_formatted, - INPUT.nnkpfile, - INPUT.wannier_spin - ); - - myWannier.calculate( - this->pelec->ekb, - this->pw_wfc, - this->pw_big, - this->sf, - this->kv, - this->psi, - this->LOWF.ParaV); + toWannier90_LCAO_IN_PW myWannier(INPUT.out_wannier_mmn, + INPUT.out_wannier_amn, + INPUT.out_wannier_unk, + INPUT.out_wannier_eig, + INPUT.out_wannier_wvfn_formatted, + INPUT.nnkpfile, + INPUT.wannier_spin); + + myWannier.calculate(this->pelec->ekb, + this->pw_wfc, + this->pw_big, + this->sf, + this->kv, + this->psi, + this->LOWF.ParaV); } else if (INPUT.wannier_method == 2) { - toWannier90_LCAO myWannier( - INPUT.out_wannier_mmn, - INPUT.out_wannier_amn, - INPUT.out_wannier_unk, - INPUT.out_wannier_eig, - INPUT.out_wannier_wvfn_formatted, - INPUT.nnkpfile, - INPUT.wannier_spin - ); + toWannier90_LCAO myWannier(INPUT.out_wannier_mmn, + INPUT.out_wannier_amn, + INPUT.out_wannier_unk, + INPUT.out_wannier_eig, + INPUT.out_wannier_wvfn_formatted, + INPUT.nnkpfile, + INPUT.wannier_spin); myWannier.calculate(this->pelec->ekb, this->kv, *(this->psi), this->LOWF.ParaV); } diff --git a/source/module_io/input.h b/source/module_io/input.h index a4f9f18895..4bfbf37be3 100644 --- a/source/module_io/input.h +++ b/source/module_io/input.h @@ -9,6 +9,7 @@ #include "module_base/vector3.h" #include "module_md/md_para.h" +#include "input_conv.h" class Input { @@ -704,10 +705,18 @@ class Input void read_bool(std::ifstream &ifs, bool &var); // Return the const string pointer of private member bands_to_print_ + // Not recommended to use this function directly, use get_out_band_kb() instead const std::string* get_bands_to_print() const { return &bands_to_print_; } + // Return parsed bands_to_print_ as a vector of integers + std::vector get_out_band_kb() const + { + std::vector out_band_kb; + Input_Conv::parse_expression(bands_to_print_, out_band_kb); + return out_band_kb; + } }; extern Input INPUT; diff --git a/source/module_io/istate_charge.cpp b/source/module_io/istate_charge.cpp index 8fe4a170d5..f4bab942e4 100644 --- a/source/module_io/istate_charge.cpp +++ b/source/module_io/istate_charge.cpp @@ -6,7 +6,6 @@ #include "module_base/parallel_common.h" #include "module_base/scalapack_connector.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" -#include "module_io/input_conv.h" #include "module_io/rho_io.h" IState_Charge::IState_Charge(psi::Psi* psi_gamma_in, Local_Orbital_Charge& loc_in) @@ -19,11 +18,20 @@ IState_Charge::~IState_Charge() } void IState_Charge::begin(Gint_Gamma& gg, - elecstate::ElecState* pelec, - const ModulePW::PW_Basis* rhopw, - const ModulePW::PW_Basis_Big* bigpw, + double** rho, + const ModuleBase::matrix& wg, + const std::vector& ef_all_spin, + const int rhopw_nrxx, + const int rhopw_nplane, + const int rhopw_startz_current, + const int rhopw_nx, + const int rhopw_ny, + const int rhopw_nz, + const int bigpw_bz, + const int bigpw_nbz, const bool gamma_only_local, const int nbands_istate, + const std::vector& out_band_kb, const int nbands, const double nelec, const int nspin, @@ -41,17 +49,14 @@ void IState_Charge::begin(Gint_Gamma& gg, ModuleBase::WARNING_QUIT("IState_Charge::begin", "Only available for GAMMA_ONLY_LOCAL now."); } - // Get bands_to_print through public function of INPUT (returns a const pointer to string) - std::string bands_to_print = *INPUT.get_bands_to_print(); - int mode = 0; - if (nbands_istate > 0 && bands_to_print.empty()) + if (nbands_istate > 0 && static_cast(out_band_kb.size()) == 0) { mode = 1; } - else if (!bands_to_print.empty()) + else if (static_cast(out_band_kb.size()) > 0) { - // If bands_to_print is not empty, set mode to 2 + // If out_band_kb (bands_to_print) is not empty, set mode to 2 mode = 2; std::cout << " Notice: INPUT parameter `nbands_istate` overwritten by `bands_to_print`!" << std::endl; } @@ -63,8 +68,6 @@ void IState_Charge::begin(Gint_Gamma& gg, int fermi_band = 0; int bands_below = 0; int bands_above = 0; - std::vector out_band_kb; - Input_Conv::parse_expression(bands_to_print, out_band_kb); // (2) cicle: // (2.1) calculate the selected density matrix from wave functions. @@ -124,14 +127,13 @@ void IState_Charge::begin(Gint_Gamma& gg, "The elements of `bands_to_print` must be either 0 or 1. Invalid values found!"); } } - // Fill bands_picked_ with values from out_band_kb, converting to int + // Fill bands_picked_ with values from out_band_kb // Remaining bands are already set to 0 int length = std::min(static_cast(out_band_kb.size()), nbands); for (int i = 0; i < length; ++i) { // out_band_kb rely on function parse_expression from input_conv.cpp - // Initially designed for ocp_set, which can be double - bands_picked_[i] = static_cast(out_band_kb[i]); + bands_picked_[i] = out_band_kb[i]; } std::cout << " Plot band decomposed charge density below the Fermi surface: band "; @@ -197,44 +199,64 @@ void IState_Charge::begin(Gint_Gamma& gg, // band, whenever it is occupied or not. #ifdef __MPI - this->idmatrix(ib, pelec, nspin, nelec, nlocal); + this->idmatrix(ib, nspin, nelec, nlocal, wg); #endif // (2) zero out of charge density array. for (int is = 0; is < nspin; ++is) { - ModuleBase::GlobalFunc::ZEROS(pelec->charge->rho[is], rhopw->nrxx); + ModuleBase::GlobalFunc::ZEROS(rho[is], rhopw_nrxx); } // (3) calculate charge density for a particular // band. - Gint_inout inout(this->loc->DM, pelec->charge->rho, Gint_Tools::job_type::rho); + Gint_inout inout(this->loc->DM, rho, Gint_Tools::job_type::rho); gg.cal_gint(&inout); - pelec->charge->save_rho_before_sum_band(); // xiaohui add 2014-12-09 + + // A solution to replace the original implementation of the following code: + // pelec->charge->save_rho_before_sum_band(); + double** rho_save = new double*[nspin]; // Initialize an array of pointers + for (int is = 0; is < nspin; is++) + { + rho_save[is] = new double[rhopw_nrxx]; // Allocate memory for each internal array + ModuleBase::GlobalFunc::DCOPY(rho[is], rho_save[is], + rhopw_nrxx); // Copy data after allocation + } + std::stringstream ssc; ssc << global_out_dir << "BAND" << ib + 1; // 0 means definitely output charge density. for (int is = 0; is < nspin; ++is) { ssc << "_SPIN" << is << "_CHG.cube"; - const double ef_tmp = pelec->eferm.get_efval(is); + + // Use a const vector to store efermi for all spins, replace the original implementation: + // const double ef_tmp = pelec->eferm.get_efval(is); + double ef_spin = ef_all_spin[is]; ModuleIO::write_rho( #ifdef __MPI - bigpw->bz, - bigpw->nbz, - rhopw->nplane, - rhopw->startz_current, + bigpw_bz, + bigpw_nbz, + rhopw_nplane, + rhopw_startz_current, #endif - pelec->charge->rho_save[is], + rho_save[is], is, nspin, 0, ssc.str(), - rhopw->nx, - rhopw->ny, - rhopw->nz, - ef_tmp, + rhopw_nx, + rhopw_ny, + rhopw_nz, + ef_spin, &(GlobalC::ucell)); } + + // Release memory of rho_save + for (int is = 0; is < nspin; is++) + { + delete[] rho_save[is]; // Release memory of each internal array + } + delete[] rho_save; // Release memory of the array of pointers } } @@ -243,54 +265,48 @@ void IState_Charge::begin(Gint_Gamma& gg, #ifdef __MPI void IState_Charge::idmatrix(const int& ib, - elecstate::ElecState* pelec, const int nspin, const double nelec, - const int nlocal) + const int nlocal, + const ModuleBase::matrix& wg) { ModuleBase::TITLE("IState_Charge", "idmatrix"); + assert(wg.nr == nspin); - assert(pelec->wg.nr == nspin); - for (int is = 0; is != nspin; ++is) + int fermi_band = static_cast((nelec + 1) / 2 + 1.0e-8); + + for (int is = 0; is < nspin; ++is) { std::vector wg_local(this->loc->ParaV->ncol, 0.0); const int ib_local = this->loc->ParaV->global2local_col(ib); - int fermi_band = 0; - fermi_band = static_cast((nelec + 1) / 2 + 1.0e-8); - if (ib_local >= 0) { - if (ib < fermi_band) - { - wg_local[ib_local] = pelec->wg(is, ib); - } - else - { - wg_local[ib_local] = pelec->wg(is, fermi_band - 1); - } // unoccupied bands, use occupation of homo + // For unoccupied bands, use occupation of HOMO + wg_local[ib_local] = (ib < fermi_band) ? wg(is, ib) : wg(is, fermi_band - 1); } - // wg_wfc(ib,iw) = pelec->wg[ib] * wfc(ib,iw); + // wg_wfc(ib,iw) = wg[ib] * wfc(ib,iw); this->psi_gamma->fix_k(is); - psi::Psi wg_wfc(this->psi_gamma[0], 1); + psi::Psi wg_wfc(*this->psi_gamma, 1); - for (int ir = 0; ir != wg_wfc.get_nbands(); ++ir) + for (int ir = 0; ir < wg_wfc.get_nbands(); ++ir) { BlasConnector::scal(wg_wfc.get_nbasis(), wg_local[ir], wg_wfc.get_pointer() + ir * wg_wfc.get_nbasis(), 1); } - // C++: dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) + // dm(iw1,iw2) = wfc(ib,iw1).T * wg_wfc(ib,iw2) const double one_float = 1.0, zero_float = 0.0; const int one_int = 1; const char N_char = 'N', T_char = 'T'; + this->loc->dm_gamma.at(is).create(wg_wfc.get_nbands(), wg_wfc.get_nbasis()); pdgemm_(&N_char, &T_char, &nlocal, &nlocal, - &pelec->wg.nc, + &wg.nc, &one_float, wg_wfc.get_pointer(), &one_int, @@ -307,10 +323,8 @@ void IState_Charge::idmatrix(const int& ib, this->loc->ParaV->desc); } - std::cout << " finished calc dm_2d : " << std::endl; - + std::cout << " Finished calculating dm_2d." << std::endl; this->loc->cal_dk_gamma_from_2D_pub(); - - std::cout << " finished convert : " << std::endl; + std::cout << " Finished converting dm_2d to dk_gamma." << std::endl; } #endif diff --git a/source/module_io/istate_charge.h b/source/module_io/istate_charge.h index bb525aad8d..dfd0c7f41d 100644 --- a/source/module_io/istate_charge.h +++ b/source/module_io/istate_charge.h @@ -1,16 +1,15 @@ #ifndef ISTATE_CHARGE_H #define ISTATE_CHARGE_H -#include -#include - -#include -#include - #include "module_basis/module_pw/pw_basis.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h" #include "module_hamilt_lcao/module_gint/gint_gamma.h" #include "module_psi/psi.h" +#include +#include +#include +#include + /** * @brief Manages the computation of the charge density for different bands. * @@ -31,11 +30,20 @@ class IState_Charge ~IState_Charge(); void begin(Gint_Gamma& gg, - elecstate::ElecState* pelec, - const ModulePW::PW_Basis* rhopw, - const ModulePW::PW_Basis_Big* bigpw, + double** rho, + const ModuleBase::matrix& wg, + const std::vector& ef_all_spin, + const int rhopw_nrxx, + const int rhopw_nplane, + const int rhopw_startz_current, + const int rhopw_nx, + const int rhopw_ny, + const int rhopw_nz, + const int bigpw_bz, + const int bigpw_nbz, const bool gamma_only_local, const int nbands_istate, + const std::vector& out_band_kb, const int nbands, const double nelec, const int nspin, @@ -50,8 +58,18 @@ class IState_Charge #ifdef __MPI /** * @brief Calculates the density matrix for a given band and spin. + * + * This method calculates the density matrix for a given band and spin using the wave function coefficients. + * It adjusts the coefficients based on the Fermi energy and performs a matrix multiplication to produce the density + * matrix. + * + * @param ib Band index. + * @param nspin Number of spin channels. + * @param nelec Total number of electrons. + * @param nlocal Number of local orbitals. + * @param wg Weight matrix for bands and spins. */ - void idmatrix(const int& ib, elecstate::ElecState* pelec, const int nspin, const double nelec, const int nlocal); + void idmatrix(const int& ib, const int nspin, const double nelec, const int nlocal, const ModuleBase::matrix& wg); #endif psi::Psi* psi_gamma; Local_Orbital_Charge* loc; From 9fc5728fa179abf81a6941dd1984514db5e92e37 Mon Sep 17 00:00:00 2001 From: LUNASEA <33978601+maki49@users.noreply.github.com> Date: Sat, 4 May 2024 17:55:36 +0800 Subject: [PATCH 15/18] Feature: write local-XC and EXX part of band(orbital) energy along with the XC matrix (#4013) * write local-XC and EXX part of band(orbital) energy with XC matrix * meet @mohanchen's requirements * xc orbital energy: change to LibRPA format --- docs/advanced/input_files/input-main.md | 2 + source/module_io/write_Vxc.hpp | 94 +++++++++++++++++++---- tests/integrate/207_NO_KP_OXC/INPUT | 10 +++ tests/integrate/207_NO_KP_OXC/KPT | 2 +- tests/integrate/207_NO_KP_OXC/k-1-Vxc.ref | 12 +-- tests/integrate/207_NO_KP_OXC/result.ref | 6 +- tests/integrate/207_NO_KP_OXC/vxc_out.ref | 15 ++++ tests/integrate/307_NO_GO_OXC/vxc_out.ref | 9 +++ tests/integrate/tools/catch_properties.sh | 17 ++-- 9 files changed, 134 insertions(+), 33 deletions(-) create mode 100644 tests/integrate/207_NO_KP_OXC/vxc_out.ref create mode 100644 tests/integrate/307_NO_GO_OXC/vxc_out.ref diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index 993e250478..167715485a 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -1605,6 +1605,8 @@ These variables are used to control the output of properties. - **Type**: Boolean - **Availability**: Numerical atomic orbital basis - **Description**: Whether to print the upper triangular part of the exchange-correlation matrices in **Kohn-Sham orbital representation** (unit: Ry): $\braket{\psi_i|V_\text{xc}^\text{(semi-)local}+V_\text{exx}+V_\text{DFTU}|\psi_j}$ for each k point into files in the directory `OUT.${suffix}`, which is useful for the subsequent GW calculation. (Note that currently DeePKS term is not included. ) The files are named `k-$k-Vxc`, the meaning of `$k`corresponding to k point and spin is same as [hs_matrix.md](../elec_properties/hs_matrix.md#out_mat_hs). +The band (KS orbital) energy for each (k-point, spin, band) will be printed in the file `OUT.${suffix}/vxc_out`. If EXX is calculated, the local and EXX part of band energy will also be printed in `OUT.${suffix}/vxc_local_out`and `OUT.${suffix}/vxc_exx_out`, respectively. All the `vxc*_out` files contains 3 integers (nk, nspin, nband) followed by nk\*nspin\*nband lines of energy Hartree and eV. +``` - **Default**: False ### out_hr_npz/out_dm_npz diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp index b9f5c435b9..236ccafb77 100644 --- a/source/module_io/write_Vxc.hpp +++ b/source/module_io/write_Vxc.hpp @@ -149,6 +149,29 @@ namespace ModuleIO return e; } + template + std::vector orbital_energy( + const int ik, + const int nbands, + const std::vector& mat_mo, + const Parallel_2D& p2d) + { +#ifdef __DEBUG + assert(nbands >= 0); +#endif + std::vector e(nbands, 0.0); + for (int i = 0;i < nbands;++i) + { + if (p2d.in_this_processor(i, i)) + { + const int index = p2d.global2local_col(i) * p2d.get_row_size() + p2d.global2local_row(i); + e[i] = get_real(mat_mo[index]); + } + } + Parallel_Reduce::reduce_all(e.data(), nbands); + return e; + } + // mohan update 2024-04-01 template void set_gint_pointer( @@ -244,12 +267,13 @@ namespace ModuleIO GlobalV::CURRENT_SPIN = is; //caution: Veff::contributeHR depends on GlobalV::CURRENT_SPIN vxcs_op_ao[is]->contributeHR(); } + std::vector> e_orb_locxc; // orbital energy (local XC) + std::vector> e_orb_tot; // orbital energy (total) #ifdef __EXX hamilt::OperatorEXX> vexx_op_ao(&lm, nullptr, &vxc_k_ao, kv); - // ======test======= - // std::vector test_vexxonly_k_ao(pv->nloc); - // hamilt::OperatorEXX> test_vexxonly_op_ao(&lm, nullptr, &test_vexxonly_k_ao, kv); - // ======test======= + std::vector vexxonly_k_ao(pv->nloc); + hamilt::OperatorEXX> vexxonly_op_ao(&lm, nullptr, &vexxonly_k_ao, kv); + std::vector> e_orb_exx; // orbital energy (EXX) #endif hamilt::OperatorDFTU> vdftu_op_ao(&lm, kv.kvec_d, nullptr, &vxc_k_ao, kv.isk); @@ -266,29 +290,34 @@ namespace ModuleIO ModuleBase::GlobalFunc::ZEROS(vxc_k_ao.data(), pv->nloc); int is = GlobalV::CURRENT_SPIN = kv.isk[ik]; dynamic_cast*>(vxcs_op_ao[is])->contributeHk(ik); + const std::vector& vlocxc_k_mo = cVc(vxc_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d); + #ifdef __EXX - if (GlobalC::exx_info.info_global.cal_exx) - { - vexx_op_ao.contributeHk(ik); - } + if (GlobalC::exx_info.info_global.cal_exx) + { + e_orb_locxc.emplace_back(orbital_energy(ik, nbands, vlocxc_k_mo, p2d)); + ModuleBase::GlobalFunc::ZEROS(vexxonly_k_ao.data(), pv->nloc); + vexx_op_ao.contributeHk(ik); + vexxonly_op_ao.contributeHk(ik); + std::vector vexx_k_mo = cVc(vexxonly_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d); + e_orb_exx.emplace_back(orbital_energy(ik, nbands, vexx_k_mo, p2d)); + } // ======test======= - // ModuleBase::GlobalFunc::ZEROS(test_vexxonly_k_ao.data(), pv->nloc); - // if (GlobalC::exx_info.info_global.cal_exx) test_vexxonly_op_ao.contributeHk(ik); - // std::vector test_vexxonly_k_mo = cVc(test_vexxonly_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d); - // exx_energy += all_band_energy(ik, test_vexxonly_k_mo, p2d, wg); + // exx_energy += all_band_energy(ik, vexx_k_mo, p2d, wg); // ======test======= #endif if (GlobalV::dft_plus_u) { vdftu_op_ao.contributeHk(ik); } - std::vector vxc_k_mo = cVc(vxc_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d); + const std::vector& vxc_tot_k_mo = cVc(vxc_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d); + e_orb_tot.emplace_back(orbital_energy(ik, nbands, vxc_tot_k_mo, p2d)); // write - ModuleIO::save_mat(-1, vxc_k_mo.data(), nbands, + ModuleIO::save_mat(-1, vxc_tot_k_mo.data(), nbands, false/*binary*/, GlobalV::out_ndigits, true/*triangle*/, false/*append*/, "Vxc", "k-" + std::to_string(ik), p2d, drank); // ======test======= - // total_energy += all_band_energy(ik, vxc_k_mo, p2d, wg); + // total_energy += all_band_energy(ik, vxc_tot_k_mo, p2d, wg); // ======test======= } // ======test======= @@ -303,6 +332,39 @@ namespace ModuleIO for (int is = 0;is < nspin0;++is) { delete vxcs_op_ao[is]; - } + } + // write the orbital energy for xc and exx in LibRPA format + auto write_orb_energy = [&kv, &nspin0, &nbands](const std::vector>& e_orb, + const std::string& label, const bool app = false) + { + assert(e_orb.size() == kv.nks); + const int nk = kv.nks / nspin0; + std::ofstream ofs; + ofs.open(GlobalV::global_out_dir + "vxc_" + (label == "" ? "out" : label + "_out"), + app ? std::ios::app : std::ios::out); + ofs << nk << "\n" << nspin0 << "\n" << nbands << "\n"; + ofs << std::scientific << std::setprecision(16); + for (int ik = 0;ik < nk;++ik) + { + for (int is = 0;is < nspin0;++is) + { + for (auto e : e_orb[is * nk + ik]) + { // Hartree and eV + ofs << e / 2. << "\t" << e * ModuleBase::Ry_to_eV << "\n"; + } + } + } + }; + if (GlobalV::MY_RANK == 0) + { + write_orb_energy(e_orb_tot, ""); +#ifdef __EXX + if (GlobalC::exx_info.info_global.cal_exx) + { + write_orb_energy(e_orb_locxc, "local"); + write_orb_energy(e_orb_exx, "exx"); + } +#endif + } } } diff --git a/tests/integrate/207_NO_KP_OXC/INPUT b/tests/integrate/207_NO_KP_OXC/INPUT index a97c26dd21..065e75cac6 100644 --- a/tests/integrate/207_NO_KP_OXC/INPUT +++ b/tests/integrate/207_NO_KP_OXC/INPUT @@ -28,3 +28,13 @@ mixing_beta 0.7 out_mat_xc 1 ks_solver scalapack_gvx +dft_functional hse +exx_real_number 1 +exx_pca_threshold 1E-4 +exx_c_threshold 1E-4 +exx_v_threshold 1 +exx_dm_threshold 1E-4 +exx_cauchy_threshold 1E-6 +exx_ccp_rmesh_times 1 +exx_separate_loop 1 +exx_hybrid_step 3 \ No newline at end of file diff --git a/tests/integrate/207_NO_KP_OXC/KPT b/tests/integrate/207_NO_KP_OXC/KPT index f5f7f4ec34..e769af7638 100644 --- a/tests/integrate/207_NO_KP_OXC/KPT +++ b/tests/integrate/207_NO_KP_OXC/KPT @@ -1,4 +1,4 @@ K_POINTS 0 Gamma -2 2 2 0 0 0 +2 1 1 0 0 0 diff --git a/tests/integrate/207_NO_KP_OXC/k-1-Vxc.ref b/tests/integrate/207_NO_KP_OXC/k-1-Vxc.ref index 6651d03e42..3b3b0dfe51 100644 --- a/tests/integrate/207_NO_KP_OXC/k-1-Vxc.ref +++ b/tests/integrate/207_NO_KP_OXC/k-1-Vxc.ref @@ -1,6 +1,6 @@ -6 (-0.805858,3.20475e-31) (-1.38778e-16,4.41729e-16) (-9.28077e-17,2.16937e-30) (1.03216e-16,1.97215e-31) (7.21645e-16,-6.64683e-16) (-1.04083e-17,-5.91646e-31) - (-0.745176,-2.69938e-30) (-1.38778e-16,3.9443e-31) (2.77556e-17,-2.36658e-30) (-0.100899,-4.89558e-15) (6.93889e-17,7.09975e-30) - (-0.827683,1.12166e-30) (-1.8735e-16,-2.21867e-30) (1.11022e-16,1.97215e-30) (4.996e-16,-1.19047e-16) - (-0.827683,3.57453e-31) (3.1225e-17,-1.72563e-30) (-3.88578e-16,1.19825e-16) - (-0.745849,3.20475e-31) (-5.38225e-17,7.39557e-31) - (-0.739459,3.69779e-31) +6 (-1.03906440e+00,-3.28984700e-16) (-4.79948026e-11,8.29474856e-16) (2.30389492e-13,1.00796921e-14) (7.41729594e-13,2.43532560e-17) (6.43712306e-11,5.70270843e-16) (3.52504822e-11,7.33979365e-13) + (-9.63737147e-01,1.26557062e-15) (6.37714326e-11,2.71427499e-12) (-8.30504138e-11,1.55361767e-14) (1.23037027e-01,5.96012135e-15) (2.41324322e-13,-2.41092682e-16) + (-1.06369885e+00,2.37908206e-16) (3.58341828e-14,-1.78915043e-15) (-1.26857316e-10,5.43607520e-12) (-1.09731307e-10,3.55419557e-12) + (-1.06369885e+00,2.36736345e-16) (1.56426261e-11,3.11153758e-14) (3.68115260e-11,-3.06158094e-13) + (-6.54572201e-01,1.11425211e-16) (5.19990845e-14,-3.84935899e-17) + (-6.50796154e-01,-5.40483680e-18) diff --git a/tests/integrate/207_NO_KP_OXC/result.ref b/tests/integrate/207_NO_KP_OXC/result.ref index 7d89999bce..505c1187ab 100644 --- a/tests/integrate/207_NO_KP_OXC/result.ref +++ b/tests/integrate/207_NO_KP_OXC/result.ref @@ -1,4 +1,4 @@ -etotref -211.4466153062836 -etotperatomref -105.7233076531 +etotref -212.5511698630333 +etotperatomref -106.2755849315 CompareXC_pass 0 -totaltimeref 0.62879 +totaltimeref 19.88 \ No newline at end of file diff --git a/tests/integrate/207_NO_KP_OXC/vxc_out.ref b/tests/integrate/207_NO_KP_OXC/vxc_out.ref new file mode 100644 index 0000000000..4a4614ef7d --- /dev/null +++ b/tests/integrate/207_NO_KP_OXC/vxc_out.ref @@ -0,0 +1,15 @@ +2 +1 +6 +-5.1240042976965061e-01 -1.3943131005032152e+01 +-5.4513974737959203e-01 -1.4834013541286041e+01 +-5.4513974737926330e-01 -1.4834013541277097e+01 +-5.2074993904080846e-01 -1.4170332808215299e+01 +-3.3346675058294384e-01 -9.0740958029457151e+00 +-3.3864852416264057e-01 -9.2150990958051811e+00 +-5.1953219929656025e-01 -1.4137196409809622e+01 +-4.8186857360940338e-01 -1.3112316576440625e+01 +-5.3184942295127013e-01 -1.4472365260298501e+01 +-5.3184942295091764e-01 -1.4472365260288909e+01 +-3.2728610048105333e-01 -8.9059116854857336e+00 +-3.2539807714783464e-01 -8.8545359349082791e+00 diff --git a/tests/integrate/307_NO_GO_OXC/vxc_out.ref b/tests/integrate/307_NO_GO_OXC/vxc_out.ref new file mode 100644 index 0000000000..ed845fbec7 --- /dev/null +++ b/tests/integrate/307_NO_GO_OXC/vxc_out.ref @@ -0,0 +1,9 @@ +1 +1 +6 +-3.4234536094979806e-01 -9.3156951855678916e+00 +-3.5746496058679345e-01 -9.7271205986516289e+00 +-2.9661198231693892e-01 -8.0712261091712225e+00 +-2.9661198231693947e-01 -8.0712261091712385e+00 +-2.9661198231693903e-01 -8.0712261091712261e+00 +-3.0978260393545448e-01 -8.4296171095988104e+00 diff --git a/tests/integrate/tools/catch_properties.sh b/tests/integrate/tools/catch_properties.sh index e6936c6b03..a348f822e8 100755 --- a/tests/integrate/tools/catch_properties.sh +++ b/tests/integrate/tools/catch_properties.sh @@ -215,13 +215,16 @@ fi if ! test -z "$has_xc" && [ $has_xc == 1 ]; then if ! test -z "$gamma_only" && [ $gamma_only == 1 ]; then - xcref=k-0-Vxc.ref - xccal=OUT.autotest/k-0-Vxc - else - xcref=k-1-Vxc.ref - xccal=OUT.autotest/k-1-Vxc - fi - python3 ../tools/CompareFile.py $xcref $xccal 4 + xcref=k-0-Vxc.ref + xccal=OUT.autotest/k-0-Vxc + else + xcref=k-1-Vxc.ref + xccal=OUT.autotest/k-1-Vxc + fi + oeref=vxc_out.ref + oecal=OUT.autotest/vxc_out + python3 ../tools/CompareFile.py $xcref $xccal 4 + python3 ../tools/CompareFile.py $oeref $oecal 6 echo "CompareXC_pass $?" >>$1 fi From a2a6165fca50a1ae0daa8611321576f4d04a7d96 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Sun, 5 May 2024 09:43:01 +0800 Subject: [PATCH 16/18] CI: update runner config (#4088) * CI: update runner config New runners with ARM arch is added into deepmodeling organization. We need to manually set the tests runs on x64 arch. * let image builds on github-hosted runner * Revert #4065 --- .github/workflows/coverage.yml | 4 ++-- .github/workflows/devcontainer.yml | 2 +- .github/workflows/dynamic.yml | 2 +- .github/workflows/image.yml | 2 +- .github/workflows/test.yml | 2 +- Dockerfile.intel | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index c552a0b9e4..678012eef3 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -8,8 +8,8 @@ on: jobs: test-coverage: name: Generate Coverage Report - runs-on: self-hosted - container: ghcr.io/deepmodeling/abacus-development-kit:gnu + runs-on: X64 + container: ghcr.io/deepmodeling/abacus-gnu steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index 597283930b..723b008214 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -7,7 +7,7 @@ on: jobs: build_container_and_push: - runs-on: self-hosted + runs-on: ubuntu-latest if: github.repository_owner == 'deepmodeling' strategy: matrix: diff --git a/.github/workflows/dynamic.yml b/.github/workflows/dynamic.yml index b0d271c4f9..85cb13482e 100644 --- a/.github/workflows/dynamic.yml +++ b/.github/workflows/dynamic.yml @@ -8,7 +8,7 @@ on: jobs: test: name: Dynamic analysis - runs-on: self-hosted + runs-on: X64 if: github.repository_owner == 'deepmodeling' container: ghcr.io/deepmodeling/abacus-gnu steps: diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index d4836ddd06..9203a72f6c 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -7,7 +7,7 @@ on: - 'v*' jobs: build_container_and_push: - runs-on: self-hosted + runs-on: ubuntu-latest if: github.repository_owner == 'deepmodeling' steps: - name: Checkout diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d1a3e1ef0d..dff5f21aa8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ concurrency: jobs: test: name: Test - runs-on: self-hosted + runs-on: X64 if: github.repository_owner == 'deepmodeling' container: image: ghcr.io/deepmodeling/abacus-gnu diff --git a/Dockerfile.intel b/Dockerfile.intel index 269713a16c..9628cc5b97 100644 --- a/Dockerfile.intel +++ b/Dockerfile.intel @@ -16,7 +16,7 @@ RUN apt-get update && \ intel-oneapi-compiler-dpcpp-cpp \ intel-oneapi-compiler-fortran \ intel-oneapi-mkl-devel \ - intel-oneapi-mpi-devel="2021.11.*" \ + intel-oneapi-mpi-devel \ intel-oneapi-vtune ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest \ LIBRARY_PATH=/opt/intel/oneapi/tbb/latest/env/../lib/intel64/gcc4.8:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/ippcp/latest/lib/:/opt/intel/oneapi/ipp/latest/lib:/opt/intel/oneapi/dpl/latest/lib:/opt/intel/oneapi/dnnl/latest/lib:/opt/intel/oneapi/dal/latest/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/ccl/latest/lib/ \ From 3f2e20a7c8f34a3db3e16410aaf93054579f5574 Mon Sep 17 00:00:00 2001 From: Chun Cai Date: Sun, 5 May 2024 16:13:17 +0800 Subject: [PATCH 17/18] CI: build images with tags on self-hosted runner (#4089) * CI: build images with tags on self-hosted runner Remove the outdated runner config. Now the abacus-* series images are built with a tag on release. Use self-hosted runner to build them to avoid AVX512 availability problem. * update outdated action * add comments --- .github/workflows/devcontainer.yml | 23 +++++++++--- .github/workflows/image.yml | 50 -------------------------- .github/workflows/performance.yml | 56 ++---------------------------- Dockerfile | 41 ---------------------- Dockerfile.gnu | 18 ++++++++-- 5 files changed, 35 insertions(+), 153 deletions(-) delete mode 100644 .github/workflows/image.yml delete mode 100644 Dockerfile diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index 723b008214..6d2113efb6 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -4,10 +4,13 @@ on: push: branches: - develop + tags: + - 'v*' + workflow_dispatch: jobs: build_container_and_push: - runs-on: ubuntu-latest + runs-on: X64 if: github.repository_owner == 'deepmodeling' strategy: matrix: @@ -16,6 +19,17 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ghcr.io/deepmodeling/abacus-${{ matrix.dockerfile }} + registry.dp.tech/deepmodeling/abacus-${{ matrix.dockerfile }} + tags: | + type=semver,pattern={{version}},enable=${{ github.ref_type == 'tag' }} + type=raw,value=latest + - name: Setup Docker Buildx uses: docker/setup-buildx-action@v3 @@ -36,10 +50,9 @@ jobs: - name: Build and Push Container uses: docker/build-push-action@v5 with: - tags: | - ghcr.io/deepmodeling/abacus-${{ matrix.dockerfile }}:latest - registry.dp.tech/deepmodeling/abacus-${{ matrix.dockerfile }}:latest + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} file: Dockerfile.${{ matrix.dockerfile }} + push: true cache-from: type=registry,ref=ghcr.io/deepmodeling/abacus-${{ matrix.dockerfile }}:latest cache-to: type=inline - push: true diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml deleted file mode 100644 index 9203a72f6c..0000000000 --- a/.github/workflows/image.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Build Image - -on: - workflow_dispatch: - push: - tags: - - 'v*' -jobs: - build_container_and_push: - runs-on: ubuntu-latest - if: github.repository_owner == 'deepmodeling' - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: | - ghcr.io/deepmodeling/abacus - registry.dp.tech/deepmodeling/abacus - tags: | - type=semver,pattern={{version}} - type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} - - - name: Setup Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Login to Aliyun Registry - uses: docker/login-action@v3 - with: - registry: registry.dp.tech - username: ${{ secrets.DP_HARBOR_USERNAME }} - password: ${{ secrets.DP_HARBOR_PASSWORD }} - - - name: Build and Push Container - uses: docker/build-push-action@v5 - with: - tags: ${{ steps.meta.outputs.tags }} - file: Dockerfile - push: true - labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml index 4acfd2991c..589a85e584 100644 --- a/.github/workflows/performance.yml +++ b/.github/workflows/performance.yml @@ -4,34 +4,9 @@ on: workflow_dispatch: jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - name: Start EC2 runner - id: start-ec2-runner - uses: machulav/ec2-github-runner@v2 - with: - mode: start - github-token: ${{ secrets.PAT }} - ec2-image-id: ami-04cd9fec4a7a39019 - ec2-instance-type: c5.9xlarge - subnet-id: subnet-72d3e53e - security-group-id: sg-06b0c93122c08aeab - test: name: Performance test - needs: start-runner - runs-on: ${{ needs.start-runner.outputs.label }} + runs-on: X64 strategy: matrix: tag: ["gnu", "intel"] @@ -43,39 +18,12 @@ jobs: - name: Install Requirements run: | apt install -y time - - name: Build - run: | - cmake -B build -DENABLE_LIBXC=ON - cmake --build build -j`nproc` - cmake --install build - name: Test run: | - test -e /opt/intel/oneapi/setvars.sh && . /opt/intel/oneapi/setvars.sh + . /opt/intel/oneapi/setvars.sh || : cd tests/performance/ bash run.sh - name: Show Result if: always() run: | cat tests/performance/sumall.dat - - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner # required to get output from the start-runner job - - test # required to wait when the main job is done - runs-on: ubuntu-latest - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-2 - - name: Stop EC2 runner - uses: machulav/ec2-github-runner@v2 - with: - mode: stop - github-token: ${{ secrets.PAT }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 68c47048ac..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# To build this Dockerfile, run `docker build -t abacus - < Dockerfile`. -# Build without cloning the repo by `docker build https://github.com/deepmodeling/abacus-develop.git#develop`, -# and optionally choose the Dockerfile in use by appending e.g. `-f Dockerfile.gnu`. -# Alternatively, pull the image with `docker pull ghcr.io/deepmodeling/abacus:latest`. -# Also available at `docker pull registry.dp.tech/deepmodeling/abacus:latest`. - -# Docker images are aimed for evaluating ABACUS. -# For production use, please compile ABACUS from source code and run in bare-metal for a better performace. - -FROM ubuntu:22.04 -RUN apt update && apt install -y --no-install-recommends \ - libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev libxc-dev \ - g++ make cmake bc time sudo vim git -# If you wish to use the LLVM compiler, replace 'g++' above with 'clang libomp-dev'. - -ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 OMPI_MCA_btl_vader_single_copy_mechanism=none - -# This will fetch the latest commit info, and store in docker building cache. -# If there are newer commits, docker build will ignore the cache and build latest codes. -ADD https://api.github.com/repos/deepmodeling/abacus-develop/git/refs/heads/develop /dev/null - -RUN git clone https://github.com/deepmodeling/abacus-develop.git --depth 1 && \ - cd abacus-develop && \ - cmake -B build && \ - cmake --build build -j`nproc` && \ - cmake --install build && \ - rm -rf build && \ - cd .. - #&& rm -rf abacus-develop -# If you have trouble cloning repo, replace "github.com" with "gitee.com". -CMD mpirun --use-hwthread-cpus abacus - -# To run ABACUS built by this image with all available threads, execute `docker run -v : -w abacus:latest`. -# Replace '' with the path to all files(including pseudopotential files), '' with a path to working directory, and '' with the path to input folder(containing 'INPUT', 'STRU', etc.). -# e.g. after cloning the repo to `$HOME` and pulling image, execute `docker run -v ~/abacus-develop/tests/integrate:/workspace -w /workspace/101_PW_15_f_pseudopots abacus:latest`. -# To run ABACUS with a given MPI process number, execute `docker run -v : -w -it --entrypoint mpirun abacus:latest -np abacus`. -# Note: It would be better using all available CPUs. Docker uses CFS to share the CPU resources, which will result in bad CPU affinity. - -# To use this image as developing environment, execute `docker run -it --entrypoint /bin/bash abacus`. -# Please refer to https://docs.docker.com/engine/reference/commandline/run/ for more details. diff --git a/Dockerfile.gnu b/Dockerfile.gnu index c8316479c9..23b44f1e4c 100644 --- a/Dockerfile.gnu +++ b/Dockerfile.gnu @@ -1,11 +1,22 @@ +# To build this Dockerfile, run `docker build -t abacus - < Dockerfile.gnu`. +# Build without cloning the repo by `docker build https://github.com/deepmodeling/abacus-develop.git#develop -f Dockerfile.intel`, +# Alternatively, pull the image with `docker pull ghcr.io/deepmodeling/abacus-intel:latest`. +# Also available at `docker pull registry.dp.tech/deepmodeling/abacus-intel:latest`. +# Available image names: abacus-gnu, abacus-intel, abacus-cuda + +# Docker images are aimed for evaluating ABACUS. +# For production use, please compile ABACUS from source code and run in bare-metal for a better performace. + FROM ubuntu:22.04 RUN apt update && apt install -y --no-install-recommends \ libopenblas-openmp-dev liblapack-dev libscalapack-mpi-dev libelpa-dev libfftw3-dev libcereal-dev \ libxc-dev libgtest-dev libgmock-dev libbenchmark-dev python3-numpy \ bc cmake git g++ make bc time sudo unzip vim wget gfortran + # If you wish to use the LLVM compiler, replace 'g++' above with 'clang libomp-dev'. ENV GIT_SSL_NO_VERIFY=true TERM=xterm-256color \ OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 OMPI_MCA_btl_vader_single_copy_mechanism=none + # The above environment variables are for using OpenMPI in Docker. RUN git clone https://github.com/llohse/libnpy.git && \ cp libnpy/include/npy.hpp /usr/local/include && \ @@ -13,17 +24,18 @@ RUN git clone https://github.com/llohse/libnpy.git && \ RUN wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.0.0%2Bcpu.zip \ --no-check-certificate --quiet -O libtorch.zip && \ - unzip -q libtorch.zip -d /opt && rm libtorch.zip + unzip -q libtorch.zip -d /opt && rm libtorch.zip ENV CMAKE_PREFIX_PATH=/opt/libtorch/share/cmake ADD https://api.github.com/repos/deepmodeling/abacus-develop/git/refs/heads/develop /dev/null + # This will fetch the latest commit info, and store in docker building cache. + # If there are newer commits, docker build will ignore the cache and build latest codes. RUN git clone https://github.com/deepmodeling/abacus-develop.git --depth 1 && \ cd abacus-develop && \ cmake -B build -DENABLE_DEEPKS=ON -DENABLE_LIBXC=ON -DENABLE_LIBRI=ON -DENABLE_RAPIDJSON=ON && \ cmake --build build -j`nproc` && \ cmake --install build && \ - rm -rf build && \ - cd .. + rm -rf build #&& rm -rf abacus-develop From 96e8c59597f4e397b33c6be3c9d0ac8b3f285b44 Mon Sep 17 00:00:00 2001 From: Taoni Bao Date: Sun, 5 May 2024 16:14:34 +0800 Subject: [PATCH 18/18] Add a warning quit when parameters do not match in `read_wfc_nao.cpp` (#4087) * Add a warning quit when param do not match in reading wfc nao * Do nearly nothing, just to restart integration test * Added Doxygen-style comments to the CTOT2q function and made some other relevant changes * Added const qualifiers where applicable and initialized variables at declaration --------- Co-authored-by: Mohan Chen --- source/module_io/read_wfc_nao.cpp | 633 +++++++++++++++++------------- 1 file changed, 351 insertions(+), 282 deletions(-) diff --git a/source/module_io/read_wfc_nao.cpp b/source/module_io/read_wfc_nao.cpp index c55e14044d..4e940874d4 100644 --- a/source/module_io/read_wfc_nao.cpp +++ b/source/module_io/read_wfc_nao.cpp @@ -1,85 +1,126 @@ #include "read_wfc_nao.h" + #include "module_base/parallel_common.h" #include "module_base/timer.h" -inline int CTOT2q( - int myid, - int naroc[2], - int nb, - int dim0, - int dim1, - int iprow, - int ipcol, - const int nbands, - double* work, - double** CTOT) +/** + * @brief Extracts a submatrix from a global orbital coefficient matrix and stores it in a linear array. + * This function is designed for real-valued matrices. + * + * @param myid The rank of the current MPI process. + * @param naroc Array with two elements: number of rows and columns this process is responsible for. + * @param nb Block size used in the global index calculation. + * @param dim0 Global number of rows in the matrix. + * @param dim1 Global number of columns in the matrix. + * @param iprow The row index in the process grid. + * @param ipcol The column index in the process grid. + * @param nbands Total number of bands (relevant matrix columns) involved in the computation. + * @param work Output array where the submatrix will be stored linearly. + * @param CTOT Global matrix from which the submatrix is extracted. + * @return int Always returns 0 as a success indicator. + */ +inline int CTOT2q(const int myid, + const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const int nbands, + double* work, + double** const CTOT) { - for(int j=0; j=nbands) continue; - for(int i=0; i= nbands) + { + continue; + } + for (int i = 0; i < naroc[0]; ++i) { - int igrow=Local_Orbital_wfc::globalIndex(i, nb, dim0, iprow); - //GlobalV::ofs_running << "i,j,igcol,igrow" << i<<" "<* work, - std::complex** CTOT) +/** + * @brief Extracts a submatrix from a global orbital coefficient matrix and stores it in a linear array. + * This function is designed for complex-valued matrices. + * + * @param myid The rank of the current MPI process. + * @param naroc Array with two elements: number of rows and columns this process is responsible for. + * @param nb Block size used in the global index calculation. + * @param dim0 Global number of rows in the matrix. + * @param dim1 Global number of columns in the matrix. + * @param iprow The row index in the process grid. + * @param ipcol The column index in the process grid. + * @param nbands Total number of bands (relevant matrix columns) involved in the computation. + * @param work Output array where the submatrix will be stored linearly. + * @param CTOT Global matrix from which the submatrix is extracted. + * @return int Always returns 0 as a success indicator. + */ +inline int CTOT2q_c(const int myid, + const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const int nbands, + std::complex* work, + std::complex** const CTOT) { - for(int j=0; j=nbands) continue; - for(int i=0; i= nbands) { - int igrow=Local_Orbital_wfc::globalIndex(i, nb, dim0, iprow); - //ofs_running << "i,j,igcol,igrow" << i<<" "<** ctot, - const int& ik, - const int& nb2d, - const int& nbands_g, - const int& nlocal_g, - const std::string& global_readin_dir, - const ModuleBase::Vector3 kvec_c, - const Parallel_Orbitals* ParaV, - psi::Psi>* psi, - elecstate::ElecState* pelec) +int ModuleIO::read_wfc_nao_complex(std::complex** ctot, + const int& ik, + const int& nb2d, + const int& nbands_g, + const int& nlocal_g, + const std::string& global_readin_dir, + const ModuleBase::Vector3 kvec_c, + const Parallel_Orbitals* ParaV, + psi::Psi>* psi, + elecstate::ElecState* pelec) { - ModuleBase::TITLE("ModuleIO","read_wfc_nao_complex"); - ModuleBase::timer::tick("ModuleIO","read_wfc_nao_complex"); + ModuleBase::TITLE("ModuleIO", "read_wfc_nao_complex"); + ModuleBase::timer::tick("ModuleIO", "read_wfc_nao_complex"); std::stringstream ss; - // read wave functions - ss << global_readin_dir << "LOWF_K_" << ik+1 <<".txt"; -// std::cout << " name is = " << ss.str() << std::endl; + // read wave functions + ss << global_readin_dir << "LOWF_K_" << ik + 1 << ".txt"; + // std::cout << " name is = " << ss.str() << std::endl; std::ifstream ifs; - int error = 0; - if (GlobalV::DRANK==0) + if (GlobalV::DRANK == 0) { ifs.open(ss.str().c_str()); if (!ifs) @@ -93,39 +134,39 @@ int ModuleIO::read_wfc_nao_complex( Parallel_Common::bcast_int(error); #endif - if (error==1) return 1; + if (error == 1) + { + return 1; + } // otherwise, find the file. - if (GlobalV::MY_RANK==0) + if (GlobalV::MY_RANK == 0) { - int ikr; - double kx,ky,kz; - int nbands, nlocal; - ModuleBase::GlobalFunc::READ_VALUE(ifs, ikr); - ifs >> kx >> ky >> kz; + int ikr = 0; + double kx = 0.0, ky = 0.0, kz = 0.0; + int nbands = 0, nlocal = 0; + ModuleBase::GlobalFunc::READ_VALUE(ifs, ikr); + ifs >> kx >> ky >> kz; ModuleBase::GlobalFunc::READ_VALUE(ifs, nbands); ModuleBase::GlobalFunc::READ_VALUE(ifs, nlocal); - if(ikr!=ik+1) - { - GlobalV::ofs_warning << " ikr=" << ikr << " ik=" << ik << std::endl; - GlobalV::ofs_warning << " k index is not correct" << std::endl; - error = 4; - } - else if ( - std::abs(kx-kvec_c.x)>1.0e-5 || - std::abs(ky-kvec_c.y)>1.0e-5 || - std::abs(kz-kvec_c.z)>1.0e-5 ) - { - GlobalV::ofs_warning << " k std::vector is not correct" << std::endl; - GlobalV::ofs_warning << " Read in kx=" << kx << " ky = " << ky << " kz = " << kz << std::endl; - GlobalV::ofs_warning << " In fact, kx=" << kvec_c.x - << " ky=" << kvec_c.y - << " kz=" << kvec_c.z << std::endl; - error = 4; - } - else if (nbands!=nbands_g) + if (ikr != ik + 1) + { + GlobalV::ofs_warning << " ikr=" << ikr << " ik=" << ik << std::endl; + GlobalV::ofs_warning << " k index is not correct" << std::endl; + error = 4; + } + else if (std::abs(kx - kvec_c.x) > 1.0e-5 || std::abs(ky - kvec_c.y) > 1.0e-5 + || std::abs(kz - kvec_c.z) > 1.0e-5) + { + GlobalV::ofs_warning << " k std::vector is not correct" << std::endl; + GlobalV::ofs_warning << " Read in kx=" << kx << " ky=" << ky << " kz=" << kz << std::endl; + GlobalV::ofs_warning << " In fact, kx=" << kvec_c.x << " ky=" << kvec_c.y << " kz=" << kvec_c.z + << std::endl; + error = 4; + } + else if (nbands != nbands_g) { GlobalV::ofs_warning << " read in nbands=" << nbands; GlobalV::ofs_warning << " NBANDS=" << nbands_g << std::endl; @@ -139,29 +180,29 @@ int ModuleIO::read_wfc_nao_complex( } ctot = new std::complex*[nbands_g]; - for (int i=0; i[nlocal_g]; } - for (int i=0; iekb(ik, ib)); - ModuleBase::GlobalFunc::READ_VALUE(ifs, pelec->wg(ik,ib)); - assert( i==ib ); - double a, b; - for (int j=0; jekb(ik, ib)); + ModuleBase::GlobalFunc::READ_VALUE(ifs, pelec->wg(ik, ib)); + assert(i == ib); + double a = 0.0, b = 0.0; + for (int j = 0; j < nlocal_g; ++j) { ifs >> a >> b; - ctot[i][j]=std::complex(a,b); - //std::cout << ctot[i][j] << " " << std::endl; + ctot[i][j] = std::complex(a, b); + // std::cout << ctot[i][j] << " " << std::endl; } } } @@ -170,78 +211,86 @@ int ModuleIO::read_wfc_nao_complex( Parallel_Common::bcast_int(error); Parallel_Common::bcast_double(&pelec->wg.c[ik * pelec->wg.nc], pelec->wg.nc); #endif - if(error==2) return 2; - if(error==3) return 3; - if(error==4) return 4; - - ModuleIO::distri_wfc_nao_complex(ctot, ik, nb2d, nbands_g, ParaV, psi); - - // mohan add 2012-02-15, - // still have bugs, but can solve it later. - // distribute the wave functions again. - - if (GlobalV::DRANK==0) + + if (error == 2) + { + return 2; + } + if (error == 3) + { + return 3; + } + if (error == 4) + { + return 4; + } + + ModuleIO::distri_wfc_nao_complex(ctot, ik, nb2d, nbands_g, ParaV, psi); + + // mohan add 2012-02-15, + // still have bugs, but can solve it later. + // distribute the wave functions again. + + if (GlobalV::DRANK == 0) { // delte the ctot - for (int i=0; i* psid, - elecstate::ElecState* pelec) +int ModuleIO::read_wfc_nao(double** ctot, + const int& is, + const bool& gamma_only_local, + const int& nb2d, + const int& nbands_g, + const int& nlocal_g, + const std::string& global_readin_dir, + const Parallel_Orbitals* ParaV, + psi::Psi* psid, + elecstate::ElecState* pelec) { - ModuleBase::TITLE("ModuleIO","read_wfc_nao"); + ModuleBase::TITLE("ModuleIO", "read_wfc_nao"); ModuleBase::timer::tick("ModuleIO", "read_wfc_nao"); - + std::stringstream ss; - if(GlobalV::GAMMA_ONLY_LOCAL) - { - // read wave functions - ss << global_readin_dir << "LOWF_GAMMA_S" << is+1 <<".txt"; - std::cout << " name is = " << ss.str() << std::endl; - } - else - { - ss << global_readin_dir << "LOWF_K.txt"; - } + if (GlobalV::GAMMA_ONLY_LOCAL) + { + // read wave functions + ss << global_readin_dir << "LOWF_GAMMA_S" << is + 1 << ".txt"; + std::cout << " name is = " << ss.str() << std::endl; + } + else + { + ss << global_readin_dir << "LOWF_K.txt"; + } std::ifstream ifs; int error = 0; - if (GlobalV::DRANK==0) + if (GlobalV::DRANK == 0) { ifs.open(ss.str().c_str()); if (!ifs) @@ -255,217 +304,237 @@ int ModuleIO::read_wfc_nao( Parallel_Common::bcast_int(error); #endif - if (error==1) return 1; + if (error == 1) + { + return 1; + } // otherwise, find the file. - if (GlobalV::MY_RANK==0) + if (GlobalV::MY_RANK == 0) { - int nbands, nlocal; + int nbands = 0, nlocal = 0; ModuleBase::GlobalFunc::READ_VALUE(ifs, nbands); ModuleBase::GlobalFunc::READ_VALUE(ifs, nlocal); - if (nbands!=nbands_g) + if (nbands != nbands_g) { GlobalV::ofs_warning << " read in nbands=" << nbands; GlobalV::ofs_warning << " NBANDS=" << nbands_g << std::endl; error = 2; + ModuleBase::WARNING_QUIT("ModuleIO::read_wfc_nao", + "The nbands read in from file do not match the global parameter NBANDS!"); } else if (nlocal != nlocal_g) { GlobalV::ofs_warning << " read in nlocal=" << nlocal; GlobalV::ofs_warning << " NLOCAL=" << nlocal_g << std::endl; error = 3; + ModuleBase::WARNING_QUIT("ModuleIO::read_wfc_nao", + "The nlocal read in from file do not match the global parameter NLOCAL!"); } ctot = new double*[nbands_g]; - for (int i=0; iekb(is, i)); ModuleBase::GlobalFunc::READ_VALUE(ifs, pelec->wg(is, i)); - assert( (i+1)==ib); - //std::cout << " ib=" << ib << std::endl; - for (int j=0; jekb(is, 0)), nbands_g); - Parallel_Common::bcast_double( &(pelec->wg(is, 0)), nbands_g); + Parallel_Common::bcast_double(&(pelec->ekb(is, 0)), nbands_g); + Parallel_Common::bcast_double(&(pelec->wg(is, 0)), nbands_g); #endif - if(error==2) return 2; - if(error==3) return 3; + if (error == 2) + { + return 2; + } + if (error == 3) + { + return 3; + } + + ModuleIO::distri_wfc_nao(ctot, is, nb2d, nbands_g, nlocal_g, ParaV, psid); - ModuleIO::distri_wfc_nao(ctot, is, nb2d, nbands_g, nlocal_g,ParaV, psid); - - // mohan add 2012-02-15, - // still have bugs, but can solve it later. - // distribute the wave functions again. - // mohan comment out 2021-02-09 + // mohan add 2012-02-15, + // still have bugs, but can solve it later. + // distribute the wave functions again. + // mohan comment out 2021-02-09 - if (GlobalV::MY_RANK==0) + if (GlobalV::MY_RANK == 0) { // delte the ctot - for (int i=0; i* psid) +void ModuleIO::distri_wfc_nao(double** ctot, + const int& is, + const int& nb2d, + const int& nbands_g, + const int& nlocal_g, + const Parallel_Orbitals* ParaV, + psi::Psi* psid) { - ModuleBase::TITLE("ModuleIO","distri_wfc_nao"); + ModuleBase::TITLE("ModuleIO", "distri_wfc_nao"); #ifdef __MPI -//1. alloc work array; set some parameters + // 1. alloc work array; set some parameters - long maxnloc; // maximum number of elements in local matrix + long maxnloc = 0; // maximum number of elements in local matrix MPI_Reduce(&ParaV->nloc_wfc, &maxnloc, 1, MPI_LONG, MPI_MAX, 0, ParaV->comm_2D); - MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, ParaV->comm_2D); - //reduce and bcast could be replaced by allreduce - - int nprocs, myid; + MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, ParaV->comm_2D); + // reduce and bcast could be replaced by allreduce + + int nprocs = 0, myid = 0; MPI_Comm_size(ParaV->comm_2D, &nprocs); MPI_Comm_rank(ParaV->comm_2D, &myid); - double *work=new double[maxnloc]; // work/buffer matrix + double* work = new double[maxnloc]; // work/buffer matrix #ifdef __DEBUG -assert(nb2d > 0); -#endif - int nb = nb2d; - int info; - int naroc[2]; // maximum number of row or column - -//2. copy from ctot to psi - for(int iprow=0; iprowdim0; ++iprow) - { - for(int ipcol=0; ipcoldim1; ++ipcol) - { -//2.1 get and bcast local 2d matrix info - const int coord[2]={iprow, ipcol}; - int src_rank; - MPI_Cart_rank(ParaV->comm_2D, coord, &src_rank); - if(myid==src_rank) - { - naroc[0]=ParaV->nrow; - naroc[1]=ParaV->ncol; - } - info=MPI_Bcast(naroc, 2, MPI_INT, src_rank, ParaV->comm_2D); - -//2.2 copy from ctot to work, then bcast work - info=CTOT2q(myid, naroc, nb, ParaV->dim0, ParaV->dim1, iprow, ipcol, nbands_g, work, ctot); - info=MPI_Bcast(work, maxnloc, MPI_DOUBLE, 0, ParaV->comm_2D); - //GlobalV::ofs_running << "iprow, ipcow : " << iprow << ipcol << std::endl; - //for (int i=0; inloc_wfc, work, inc, &(psid[0](is, 0, 0)), inc); - } - }//loop ipcol - }//loop iprow + assert(nb2d > 0); +#endif + const int nb = nb2d; + int info = 0; + int naroc[2]; // maximum number of row or column - delete[] work; -#else - for (int i=0; idim0; ++iprow) + { + for (int ipcol = 0; ipcol < ParaV->dim1; ++ipcol) { - for (int j=0; jcomm_2D, coord, &src_rank); + if (myid == src_rank) { - psid[0](is, i, j) = ctot[i][j]; + naroc[0] = ParaV->nrow; + naroc[1] = ParaV->ncol; } + info = MPI_Bcast(naroc, 2, MPI_INT, src_rank, ParaV->comm_2D); + + // 2.2 copy from nctot to work, then bcast work + info = CTOT2q(myid, naroc, nb, ParaV->dim0, ParaV->dim1, iprow, ipcol, nbands_g, work, ctot); + info = MPI_Bcast(work, maxnloc, MPI_DOUBLE, 0, ParaV->comm_2D); + // GlobalV::ofs_running << "iprow, ipcow : " << iprow << ipcol << std::endl; + // for (int i=0; inloc_wfc, work, inc, &(psid[0](is, 0, 0)), inc); + } + } // loop ipcol + } // loop iprow + + delete[] work; +#else + for (int i = 0; i < nbands_g; i++) + { + for (int j = 0; j < nlocal_g; j++) + { + psid[0](is, i, j) = ctot[i][j]; } + } #endif return; } -void ModuleIO::distri_wfc_nao_complex(std::complex** ctot, const int& ik, const int& nb2d, - const int& nbands_g, const Parallel_Orbitals* ParaV, psi::Psi>* psi) +void ModuleIO::distri_wfc_nao_complex(std::complex** ctot, + const int& ik, + const int& nb2d, + const int& nbands_g, + const Parallel_Orbitals* ParaV, + psi::Psi>* psi) { - ModuleBase::TITLE("ModuleIO","distri_wfc_nao_complex"); + ModuleBase::TITLE("ModuleIO", "distri_wfc_nao_complex"); #ifdef __MPI -//1. alloc work array; set some parameters + // 1. alloc work array; set some parameters + + long maxnloc = 0; // maximum number of elements in local matrix + MPI_Reduce(&ParaV->nloc_wfc, &maxnloc, 1, MPI_LONG, MPI_MAX, 0, ParaV->comm_2D); + MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, ParaV->comm_2D); + // reduce and bcast could be replaced by allreduce - long maxnloc; // maximum number of elements in local matrix - MPI_Reduce(&ParaV->nloc_wfc, &maxnloc, 1, MPI_LONG, MPI_MAX, 0, ParaV->comm_2D); - MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, ParaV->comm_2D); - //reduce and bcast could be replaced by allreduce - - int nprocs, myid; + int nprocs = 0, myid = 0; MPI_Comm_size(ParaV->comm_2D, &nprocs); MPI_Comm_rank(ParaV->comm_2D, &myid); - std::complex *work=new std::complex[maxnloc]; // work/buffer matrix + std::complex* work = new std::complex[maxnloc]; // work/buffer matrix #ifdef __DEBUG -assert(nb2d > 0); -#endif - int nb = nb2d; - int info; - int naroc[2]; // maximum number of row or column - -//2. copy from ctot to psi - for(int iprow=0; iprowdim0; ++iprow) - { - for(int ipcol=0; ipcoldim1; ++ipcol) - { -//2.1 get and bcast local 2d matrix info - const int coord[2]={iprow, ipcol}; - int src_rank; - MPI_Cart_rank(ParaV->comm_2D, coord, &src_rank); - if(myid==src_rank) - { - naroc[0]=ParaV->nrow; - naroc[1]=ParaV->ncol_bands; - } - info=MPI_Bcast(naroc, 2, MPI_INT, src_rank, ParaV->comm_2D); - -//2.2 copy from ctot to work, then bcast work - info=CTOT2q_c(myid, naroc, nb, ParaV->dim0, ParaV->dim1, iprow, ipcol, nbands_g, work, ctot); - info=MPI_Bcast(work, maxnloc, MPI_DOUBLE_COMPLEX, 0, ParaV->comm_2D); - //ofs_running << "iprow, ipcow : " << iprow << ipcol << std::endl; - //for (int i=0; i 0); +#endif + const int nb = nb2d; + int info = 0; + int naroc[2]; // maximum number of row or column + + // 2. copy from ctot to psi + for (int iprow = 0; iprow < ParaV->dim0; ++iprow) + { + for (int ipcol = 0; ipcol < ParaV->dim1; ++ipcol) + { + // 2.1 get and bcast local 2d matrix info + const int coord[2] = {iprow, ipcol}; + int src_rank = 0; + MPI_Cart_rank(ParaV->comm_2D, coord, &src_rank); + if (myid == src_rank) + { + naroc[0] = ParaV->nrow; + naroc[1] = ParaV->ncol_bands; + } + info = MPI_Bcast(naroc, 2, MPI_INT, src_rank, ParaV->comm_2D); + + // 2.2 copy from ctot to work, then bcast work + info = CTOT2q_c(myid, naroc, nb, ParaV->dim0, ParaV->dim1, iprow, ipcol, nbands_g, work, ctot); + info = MPI_Bcast(work, maxnloc, MPI_DOUBLE_COMPLEX, 0, ParaV->comm_2D); + // ofs_running << "iprow, ipcow : " << iprow << ipcol << std::endl; + // for (int i=0; inloc_wfc, work, inc, &(psi[0](ik, 0, 0)), inc); - } - }//loop ipcol - }//loop iprow - - delete[] work; + if (myid == src_rank) + { + BlasConnector::copy(ParaV->nloc_wfc, work, inc, &(psi[0](ik, 0, 0)), inc); + } + } // loop ipcol + } // loop iprow + + delete[] work; #else - ModuleBase::WARNING_QUIT("ModuleIO::distri_wfc_nao","check the code without MPI."); + ModuleBase::WARNING_QUIT("ModuleIO::distri_wfc_nao", "check the code without MPI."); #endif return; }