diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 7e9d8f2884..33ac069cf6 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -499,23 +499,27 @@ OBJS_LCAO=DM_gamma.o\ td_velocity.o\ upsi.o\ FORCE_STRESS.o\ - FORCE_gamma.o\ - FORCE_gamma_edm.o\ - FORCE_gamma_tvnl.o\ - FORCE_gamma_vl.o\ - FORCE_k.o\ - foverlap_k.o\ - ftvnl_dphi_k.o\ - fvl_dphi_k.o\ - fvnl_dbeta_k.o\ - LCAO_gen_fixedH.o\ - grid_init.o\ + FORCE_gamma.o\ + FORCE_k.o\ + fvl_dphi_gamma.o\ + fvl_dphi_k.o\ + fedm_gamma.o\ + fedm_k.o\ + ftvnl_dphi_gamma.o\ + ftvnl_dphi_k.o\ + fvnl_dbeta_gamma.o\ + fvnl_dbeta_k.o\ + grid_init.o\ spar_dh.o\ spar_exx.o\ spar_hsr.o\ spar_st.o\ spar_u.o\ LCAO_matrix.o\ + LCAO_set_fs.o\ + LCAO_set_st.o\ + LCAO_nl_mu.o\ + LCAO_nl_beta.o\ LCAO_nnr.o\ center2_orb-orb11.o\ center2_orb-orb21.o\ diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index c5ff8501db..60f4143e3a 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -163,9 +163,6 @@ void ESolver_KS_LCAO::before_all_runners(Input& inp, UnitCell& ucell) this->init_basis_lcao(this->orb_con, inp, ucell); //------------------init Basis_lcao---------------------- - // 4) redundant ParaV and LM pointers - this->gen_h.LM = &this->LM; - //! pass basis-pointer to EState and Psi /* Inform: on getting rid of ORB_control and Parallel_Orbitals @@ -381,7 +378,6 @@ void ESolver_KS_LCAO::cal_force(ModuleBase::matrix& force) this->pelec, this->psi, this->LM, - this->gen_h, // mohan add 2024-04-02 this->GG, // mohan add 2024-04-01 this->GK, // mohan add 2024-04-01 uot_, @@ -1433,7 +1429,6 @@ ModuleIO::Output_Mat_Sparse ESolver_KS_LCAO::create_Output_Mat_Spars istep, this->pelec->pot->get_effective_v(), this->orb_con.ParaV, - this->gen_h, // mohan add 2024-04-06 this->GK, // mohan add 2024-04-01 uot_, this->LM, diff --git a/source/module_esolver/esolver_ks_lcao.h b/source/module_esolver/esolver_ks_lcao.h index 429170d683..b77c9d221d 100644 --- a/source/module_esolver/esolver_ks_lcao.h +++ b/source/module_esolver/esolver_ks_lcao.h @@ -75,8 +75,6 @@ namespace ModuleESolver // we will get rid of this class soon, don't use it, mohan 2024-03-28 Local_Orbital_Charge LOC; - LCAO_gen_fixedH gen_h; // mohan add 2024-04-02 - // used for k-dependent grid integration. Gint_k GK; diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index ae011836e0..9ace2ea048 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -145,7 +145,6 @@ void ESolver_KS_LCAO::beforesolver(const int istep) this->p_hamilt = new hamilt::HamiltLCAO( GlobalV::GAMMA_ONLY_LOCAL ? &(this->GG) : nullptr, GlobalV::GAMMA_ONLY_LOCAL ? nullptr : &(this->GK), - &(this->gen_h), &(this->LM), &(this->LOC), this->pelec->pot, diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp index a9125f1fde..c3ad0b6ac2 100644 --- a/source/module_esolver/esolver_ks_lcao_tddft.cpp +++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp @@ -91,7 +91,6 @@ void ESolver_KS_LCAO_TDDFT::before_all_runners(Input& inp, UnitCell& ucell) // this part will be updated soon // pass Hamilt-pointer to Operator - this->gen_h.LM = &this->LM; this->LOC.ParaV = this->LM.ParaV;; this->LOWF.ParaV = this->LM.ParaV; @@ -435,8 +434,7 @@ void ESolver_KS_LCAO_TDDFT::after_scf(const int istep) uot_, tmp_DM->get_paraV_pointer(), this->RA, - this->LM, // mohan add 2024-04-02 - this->gen_h); // mohan add 2024-02 + this->LM); // mohan add 2024-04-02 } ESolver_KS_LCAO, double>::after_scf(istep); } diff --git a/source/module_esolver/io_npz.cpp b/source/module_esolver/io_npz.cpp index 535d915016..33891d4977 100644 --- a/source/module_esolver/io_npz.cpp +++ b/source/module_esolver/io_npz.cpp @@ -117,7 +117,7 @@ void ESolver_KS_LCAO::read_mat_npz(std::string& zipname, hamilt::HContai assert(orbital_info[iw*3] == GlobalC::ucell.atoms[it].iw2n[iw]); assert(orbital_info[iw*3+1] == GlobalC::ucell.atoms[it].iw2l[iw]); const int im = GlobalC::ucell.atoms[it].iw2m[iw]; - const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; // copied from LCAO_gen_fixedH.cpp + const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; assert(orbital_info[iw*3+2] == m); } } @@ -413,7 +413,7 @@ void ESolver_KS_LCAO::output_mat_npz(std::string& zipname, const hamilt: orbital_info[iw*3] = GlobalC::ucell.atoms[it].iw2n[iw]; orbital_info[iw*3+1] = GlobalC::ucell.atoms[it].iw2l[iw]; const int im = GlobalC::ucell.atoms[it].iw2m[iw]; - const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; // copied from LCAO_gen_fixedH.cpp + const int m = (im % 2 == 0) ? -im/2 : (im+1)/2; orbital_info[iw*3+2] = m; } shape={(size_t)GlobalC::ucell.atoms[it].nw,3}; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt index ccd1485419..970dc999ef 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt +++ b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt @@ -17,15 +17,15 @@ if(ENABLE_LCAO) operator_lcao/dftu_lcao.cpp FORCE_STRESS.cpp FORCE_gamma.cpp - FORCE_gamma_edm.cpp - FORCE_gamma_tvnl.cpp - FORCE_gamma_vl.cpp FORCE_k.cpp - foverlap_k.cpp - ftvnl_dphi_k.cpp + fvl_dphi_gamma.cpp fvl_dphi_k.cpp + fedm_gamma.cpp + fedm_k.cpp + ftvnl_dphi_gamma.cpp + ftvnl_dphi_k.cpp + fvnl_dbeta_gamma.cpp fvnl_dbeta_k.cpp - LCAO_gen_fixedH.cpp grid_init.cpp spar_dh.cpp spar_exx.cpp @@ -34,7 +34,11 @@ if(ENABLE_LCAO) spar_u.cpp LCAO_matrix.cpp LCAO_nnr.cpp - record_adj.cpp + LCAO_set_fs.cpp + LCAO_set_st.cpp + LCAO_nl_mu.cpp + LCAO_nl_beta.cpp + record_adj.cpp center2_orb-orb11.cpp center2_orb-orb21.cpp center2_orb-orb22.cpp diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h index 5b462f2112..2eb7a1631c 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE.h @@ -6,7 +6,6 @@ #include "module_base/matrix.h" #include "module_elecstate/module_dm/density_matrix.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h" #include "module_psi/psi.h" #include "module_hamilt_lcao/module_gint/gint_gamma.h" @@ -44,8 +43,10 @@ class Force_LCAO elecstate::Potential* pot; // orthonormal force + contribution from T and VNL - void ftable(const bool isforce, + void ftable( + const bool isforce, const bool isstress, + const UnitCell& ucell, const psi::Psi* psi, const elecstate::ElecState* pelec, ModuleBase::matrix& foverlap, @@ -59,7 +60,6 @@ class Force_LCAO #ifdef __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02 typename TGint::type& gint, const ORB_gen_tables* uot, const Parallel_Orbitals& pv, @@ -71,7 +71,6 @@ class Force_LCAO // get the ds, dt, dvnl. void allocate(const Parallel_Orbitals& pv, LCAO_Matrix& lm, - LCAO_gen_fixedH& gen_h, const ORB_gen_tables* uot, const int& nks = 0, const std::vector>& kvec_d = {}); @@ -88,8 +87,11 @@ class Force_LCAO // forces related to energy density matrix //------------------------------------------------------------- - void cal_foverlap(const bool isforce, + void cal_fedm( + const bool isforce, const bool isstress, + const UnitCell& ucell, + const elecstate::DensityMatrix* dm, const psi::Psi* psi, const Parallel_Orbitals& pv, const elecstate::ElecState* pelec, @@ -97,13 +99,12 @@ class Force_LCAO ModuleBase::matrix& foverlap, ModuleBase::matrix& soverlap, const K_Vectors* kv = nullptr, - Record_adj* ra = nullptr, - const elecstate::DensityMatrix* DM = nullptr); + Record_adj* ra = nullptr); //------------------------------------------------------------- // forces related to kinetic and non-local pseudopotentials //-------------------------------------------------------------- - void cal_ftvnl_dphi(const elecstate::DensityMatrix* DM, + void cal_ftvnl_dphi(const elecstate::DensityMatrix* dm, const Parallel_Orbitals& pv, const UnitCell& ucell, LCAO_Matrix& lm, @@ -113,7 +114,7 @@ class Force_LCAO ModuleBase::matrix& stvnl_dphi, Record_adj* ra = nullptr); - void cal_fvnl_dbeta(const elecstate::DensityMatrix* DM, + void cal_fvnl_dbeta(const elecstate::DensityMatrix* dm, const Parallel_Orbitals& pv, const UnitCell& ucell, const LCAO_Orbitals& orb, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp index 981cf2af8e..aaf8dca93b 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp @@ -33,7 +33,6 @@ void Force_Stress_LCAO::getForceStress(const bool isforce, const elecstate::ElecState* pelec, const psi::Psi* psi, LCAO_Matrix& lm, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_Gamma &gint_gamma, // mohan add 2024-04-01 Gint_k &gint_k, // mohan add 2024-04-01 const ORB_gen_tables* uot, @@ -156,7 +155,6 @@ void Force_Stress_LCAO::getForceStress(const bool isforce, #ifdef __DEEPKS svnl_dalpha, #endif - gen_h, // mohan add 2024-04-02 gint_gamma, gint_k, uot, @@ -744,7 +742,6 @@ void Force_Stress_LCAO::integral_part( #if __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_Gamma &gint_gamma, // mohan add 2024-04-01 Gint_k &gint_k, // mohan add 2024-04-01 const ORB_gen_tables* uot, @@ -755,6 +752,7 @@ void Force_Stress_LCAO::integral_part( flk.ftable(isforce, isstress, + GlobalC::ucell, psi, pelec, foverlap, @@ -768,7 +766,6 @@ void Force_Stress_LCAO::integral_part( #if __DEEPKS svnl_dalpha, #endif - gen_h, gint_gamma, uot, pv, @@ -795,7 +792,6 @@ void Force_Stress_LCAO>::integral_part( #if __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_Gamma &gint_gamma, Gint_k &gint_k, const ORB_gen_tables* uot, @@ -805,6 +801,7 @@ void Force_Stress_LCAO>::integral_part( { flk.ftable(isforce, isstress, + GlobalC::ucell, psi, pelec, foverlap, @@ -818,7 +815,6 @@ void Force_Stress_LCAO>::integral_part( #if __DEEPKS svnl_dalpha, #endif - gen_h, gint_k, uot, pv, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h index ced5b66eac..63526e9640 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.h @@ -36,7 +36,6 @@ class Force_Stress_LCAO const elecstate::ElecState* pelec, const psi::Psi* psi, LCAO_Matrix &lm, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_Gamma &gint_gamma, // mohan add 2024-04-01 Gint_k &gint_k, // mohan add 2024-04-01 const ORB_gen_tables* uot, @@ -88,7 +87,6 @@ class Force_Stress_LCAO #if __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_Gamma &gint_gamma, Gint_k &gint_k, const ORB_gen_tables* uot, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp index 7e12a97538..1d8677bb14 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp @@ -9,12 +9,12 @@ #endif #include "module_io/write_HS.h" #include "module_elecstate/elecstate_lcao.h" -#include "module_cell/module_neighbor/sltk_grid_driver.h" //GridD +#include "module_cell/module_neighbor/sltk_grid_driver.h" //GridD +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" template<> void Force_LCAO::allocate(const Parallel_Orbitals& pv, LCAO_Matrix& lm, - LCAO_gen_fixedH& gen_h, const ORB_gen_tables* uot, const int& nks, const std::vector>& kvec_d) @@ -67,7 +67,8 @@ void Force_LCAO::allocate(const Parallel_Orbitals& pv, ModuleBase::Memory::record("Stress::dSH_GO", sizeof(double) * pv.nloc * 12); } // calculate dS in LCAO basis - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'S', cal_deri, GlobalC::ucell, @@ -90,7 +91,8 @@ void Force_LCAO::allocate(const Parallel_Orbitals& pv, // calculate dT // calculate T + VNL(P1) in LCAO basis - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'T', cal_deri, GlobalC::ucell, @@ -100,10 +102,14 @@ void Force_LCAO::allocate(const Parallel_Orbitals& pv, &GlobalC::GridD, lm.Hloc_fixed.data()); - // ModuleBase::timer::tick("Force_LCAO_gamma","build_Nonlocal_mu"); - gen_h.build_Nonlocal_mu_new(lm.Hloc_fixed.data(), cal_deri, GlobalC::ucell, GlobalC::ORB, *uot, &(GlobalC::GridD)); - // ModuleBase::timer::tick("Force_LCAO_gamma","build_Nonlocal_mu"); - // test_gamma(lm.DHloc_fixed_x, "dHloc_fixed_x Vnl part"); + LCAO_domain::build_Nonlocal_mu_new( + lm, + lm.Hloc_fixed.data(), + cal_deri, + GlobalC::ucell, + GlobalC::ORB, + *uot, + &GlobalC::GridD); // calculate asynchronous S matrix to output for Hefei-NAMD if (INPUT.cal_syns) @@ -112,7 +118,8 @@ void Force_LCAO::allocate(const Parallel_Orbitals& pv, lm.zeros_HSgamma('S'); - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'S', cal_deri, GlobalC::ucell, @@ -208,8 +215,10 @@ void Force_LCAO::test(Parallel_Orbitals& pv, double* mm, const std::stri // be called in force_lo.cpp template<> -void Force_LCAO::ftable(const bool isforce, +void Force_LCAO::ftable( + const bool isforce, const bool isstress, + const UnitCell& ucell, const psi::Psi* psi, const elecstate::ElecState* pelec, ModuleBase::matrix& foverlap, @@ -223,7 +232,6 @@ void Force_LCAO::ftable(const bool isforce, #ifdef __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02 TGint::type& gint, const ORB_gen_tables* uot, const Parallel_Orbitals& pv, @@ -235,39 +243,80 @@ void Force_LCAO::ftable(const bool isforce, ModuleBase::timer::tick("Force_LCAO", "ftable"); // get DM - const elecstate::DensityMatrix* DM + const elecstate::DensityMatrix* dm = dynamic_cast*>(pelec)->get_DM(); - this->ParaV = DM->get_paraV_pointer(); - //const Parallel_Orbitals* pv = loc.ParaV; + this->ParaV = dm->get_paraV_pointer(); // allocate DSloc_x, DSloc_y, DSloc_z // allocate DHloc_fixed_x, DHloc_fixed_y, DHloc_fixed_z - this->allocate(pv, lm, gen_h, uot); - - // calculate the 'energy density matrix' here. - this->cal_foverlap(isforce, isstress, psi, pv, pelec, lm, foverlap, soverlap); - - // sum up the density matrix with different spin - // DM->sum_DMR_spin(); - - this->cal_ftvnl_dphi(DM, pv, GlobalC::ucell, lm, isforce, isstress, ftvnl_dphi, stvnl_dphi); - - this->cal_fvnl_dbeta(DM, pv, GlobalC::ucell, GlobalC::ORB, *uot, GlobalC::GridD, isforce, isstress, fvnl_dbeta, svnl_dbeta); - - this->cal_fvl_dphi(isforce, isstress, pelec->pot, gint, fvl_dphi, svl_dphi); + this->allocate( + pv, + lm, + uot); + + // calculate the force related to 'energy density matrix'. + this->cal_fedm( + isforce, + isstress, + ucell, + dm, + psi, + pv, + pelec, + lm, + foverlap, + soverlap); + + this->cal_ftvnl_dphi( + dm, + pv, + ucell, + lm, + isforce, + isstress, + ftvnl_dphi, + stvnl_dphi); + + this->cal_fvnl_dbeta( + dm, + pv, + ucell, + GlobalC::ORB, + *uot, + GlobalC::GridD, + isforce, + isstress, + fvnl_dbeta, + svnl_dbeta); + + this->cal_fvl_dphi( + isforce, + isstress, + pelec->pot, + gint, + fvl_dphi, + svl_dphi); // caoyu add for DeePKS #ifdef __DEEPKS if (GlobalV::deepks_scf) { - const std::vector>& dm_gamma = DM->get_DMK_vector(); - GlobalC::ld.cal_projected_DM(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD); - GlobalC::ld.cal_descriptor(GlobalC::ucell.nat); - GlobalC::ld.cal_gedm(GlobalC::ucell.nat); + const std::vector>& dm_gamma = dm->get_DMK_vector(); + + GlobalC::ld.cal_projected_DM( + dm, + ucell, + GlobalC::ORB, + GlobalC::GridD); + + GlobalC::ld.cal_descriptor(ucell.nat); + + GlobalC::ld.cal_gedm(ucell.nat); + GlobalC::ld.cal_f_delta_gamma( dm_gamma, - GlobalC::ucell, + ucell, GlobalC::ORB, GlobalC::GridD, isstress, @@ -275,6 +324,7 @@ void Force_LCAO::ftable(const bool isforce, #ifdef __MPI Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc); + if (isstress) { Parallel_Reduce::reduce_pool(svnl_dalpha.c, svnl_dalpha.nr * svnl_dalpha.nc); @@ -285,7 +335,7 @@ void Force_LCAO::ftable(const bool isforce, { GlobalC::ld.print_dm(dm_gamma[0]); GlobalC::ld.check_projected_dm(); - GlobalC::ld.check_descriptor(GlobalC::ucell); + GlobalC::ld.check_descriptor(ucell); GlobalC::ld.check_gedm(); GlobalC::ld.cal_e_delta_band(dm_gamma); @@ -293,7 +343,7 @@ void Force_LCAO::ftable(const bool isforce, ofs << std::setprecision(10) << GlobalC::ld.e_delta_band; std::ofstream ofs1("E_delta.dat"); ofs1 << std::setprecision(10) << GlobalC::ld.E_delta; - GlobalC::ld.check_f_delta(GlobalC::ucell.nat, svnl_dalpha); + GlobalC::ld.check_f_delta(ucell.nat, svnl_dalpha); } } #endif @@ -331,8 +381,10 @@ void stress_fill(const double& lat0_, const double& omega_, ModuleBase::matrix& { for (int j = 0; j < 3; ++j) { - if (j > i) - stress_matrix(j, i) = stress_matrix(i, j); + if (j > i) + { + stress_matrix(j, i) = stress_matrix(i, j); + } stress_matrix(i, j) *= weight; } } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp index 8a8274d12b..41772c4fd7 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp @@ -13,6 +13,7 @@ #include "module_elecstate/elecstate_lcao.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_io/write_HS.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" #ifdef __DEEPKS #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" @@ -25,7 +26,6 @@ template<> void Force_LCAO>::allocate(const Parallel_Orbitals& pv, LCAO_Matrix& lm, - LCAO_gen_fixedH& gen_h, const ORB_gen_tables* uot, const int& nks, const std::vector>& kvec_d) @@ -89,7 +89,8 @@ void Force_LCAO>::allocate(const Parallel_Orbitals& pv, // calculate dS = //----------------------------- bool cal_deri = true; - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'S', cal_deri, GlobalC::ucell, @@ -97,7 +98,7 @@ void Force_LCAO>::allocate(const Parallel_Orbitals& pv, pv, *uot, &GlobalC::GridD, - gen_h.LM->SlocR.data()); + lm.SlocR.data()); //----------------------------------------- // (2) allocate for @@ -120,7 +121,8 @@ void Force_LCAO>::allocate(const Parallel_Orbitals& pv, // calculate dT= in LCAO // calculate T + VNL(P1) in LCAO basis - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'T', cal_deri, GlobalC::ucell, @@ -128,17 +130,26 @@ void Force_LCAO>::allocate(const Parallel_Orbitals& pv, pv, *uot, &GlobalC::GridD, - gen_h.LM->Hloc_fixedR.data()); + lm.Hloc_fixedR.data()); // calculate dVnl= in LCAO - gen_h.build_Nonlocal_mu_new(gen_h.LM->Hloc_fixed.data(), cal_deri, GlobalC::ucell, GlobalC::ORB, *uot, &(GlobalC::GridD)); + LCAO_domain::build_Nonlocal_mu_new( + lm, + lm.Hloc_fixed.data(), + cal_deri, + GlobalC::ucell, + GlobalC::ORB, + *uot, + &GlobalC::GridD); - // calculate asynchronous S matrix to output for Hefei-NAMD - if (INPUT.cal_syns) - { - cal_deri = false; + // calculate asynchronous S matrix to output for Hefei-NAMD + if (INPUT.cal_syns) + { + cal_deri = false; - gen_h.build_ST_new('S', + LCAO_domain::build_ST_new( + lm, + 'S', cal_deri, GlobalC::ucell, GlobalC::ORB, @@ -291,6 +302,7 @@ void Force_LCAO>::test( template<> void Force_LCAO>::ftable(const bool isforce, const bool isstress, + const UnitCell& ucell, const psi::Psi>* psi, const elecstate::ElecState* pelec, ModuleBase::matrix& foverlap, @@ -304,7 +316,6 @@ void Force_LCAO>::test( #ifdef __DEEPKS ModuleBase::matrix& svnl_dalpha, #endif - LCAO_gen_fixedH& gen_h, TGint>::type& gint, const ORB_gen_tables* uot, const Parallel_Orbitals& pv, @@ -315,22 +326,23 @@ void Force_LCAO>::test( ModuleBase::TITLE("Force_LCAO_k", "ftable_k"); ModuleBase::timer::tick("Force_LCAO_k", "ftable_k"); - elecstate::DensityMatrix, double>* DM + elecstate::DensityMatrix, double>* dm = dynamic_cast>*>(pelec)->get_DM(); this->allocate( pv, lm, - gen_h, uot, kv->get_nks(), kv->kvec_d); // calculate the energy density matrix // and the force related to overlap matrix and energy density matrix. - this->cal_foverlap( + this->cal_fedm( isforce, isstress, + ucell, + dm, psi, pv, pelec, @@ -338,13 +350,12 @@ void Force_LCAO>::test( foverlap, soverlap, kv, - ra, - DM); + ra); this->cal_ftvnl_dphi( - DM, + dm, pv, - GlobalC::ucell, + ucell, lm, isforce, isstress, @@ -362,9 +373,9 @@ void Force_LCAO>::test( svl_dphi); this->cal_fvnl_dbeta( - DM, + dm, pv, - GlobalC::ucell, + ucell, GlobalC::ORB, *uot, GlobalC::GridD, @@ -376,13 +387,16 @@ void Force_LCAO>::test( #ifdef __DEEPKS if (GlobalV::deepks_scf) { - const std::vector>>& dm_k = DM->get_DMK_vector(); - GlobalC::ld.cal_projected_DM_k(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD); - GlobalC::ld.cal_descriptor(GlobalC::ucell.nat); - GlobalC::ld.cal_gedm(GlobalC::ucell.nat); + const std::vector>>& dm_k = dm->get_DMK_vector(); + + GlobalC::ld.cal_projected_DM_k(dm, ucell, GlobalC::ORB, GlobalC::GridD); + + GlobalC::ld.cal_descriptor(ucell.nat); + + GlobalC::ld.cal_gedm(ucell.nat); GlobalC::ld.cal_f_delta_k(dm_k, - GlobalC::ucell, + ucell, GlobalC::ORB, GlobalC::GridD, kv->get_nks(), diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h index 25133c8b52..07ed0cfa2e 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h @@ -1,24 +1,177 @@ #ifndef LCAO_DOMAIN_H #define LCAO_DOMAIN_H -#include "module_cell/module_neighbor/sltk_atom_arrange.h" -#include "module_cell/module_neighbor/sltk_grid_driver.h" -#include "module_hamilt_pw/hamilt_pwdft/global.h" -#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h" +#include "module_hamilt_lcao/module_gint/grid_technique.h" +#include "module_hamilt_lcao/module_gint/gint_gamma.h" +#include "module_hamilt_lcao/module_gint/gint_k.h" +#include "module_base/global_function.h" +#include "module_base/global_variable.h" +#include "module_basis/module_ao/ORB_gen_tables.h" +#include "module_cell/module_neighbor/sltk_grid_driver.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" +#include "module_basis/module_ao/ORB_gen_tables.h" +#include "module_base/vector3.h" namespace LCAO_domain { -//! prepare grid integration -void grid_prepare( - const Grid_Technique& gt, - Gint_Gamma &gint_gamma, - Gint_k &gint_k, - const ModulePW::PW_Basis& rhopw, - const ModulePW::PW_Basis_Big& bigpw); + // can used in gamma algorithm. + void build_Nonlocal_beta_new( + LCAO_Matrix &lm, + double* Hloc, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + Grid_Driver* GridD); + + + void build_Nonlocal_mu_new( + LCAO_Matrix &lm, + double* HlocR, + const bool& calc_deri, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + Grid_Driver* GridD); + + /** + * @brief prepare gird integration + */ + void grid_prepare( + const Grid_Technique& gt, + Gint_Gamma &gint_gamma, + Gint_k &gint_k, + const ModulePW::PW_Basis& rhopw, + const ModulePW::PW_Basis_Big& bigpw); + + /** + * @brief set the elements of force-related matrices in LCAO method + */ + void set_force ( + const Parallel_Orbitals &pv, + const int& iw1_all, + const int& iw2_all, + const double& vx, + const double& vy, + const double& vz, + const char &dtype, + double* dsloc_x, + double* dsloc_y, + double* dsloc_z, + double* dhloc_fixed_x, + double* dhloc_fixed_y, + double* dhloc_fixed_z); + + /** + * @brief set the elements of stress-related matrices in LCAO method + */ + void set_stress ( + const Parallel_Orbitals &pv, + const int& iw1_all, + const int& iw2_all, + const double& vx, + const double& vy, + const double& vz, + const char &dtype, + const ModuleBase::Vector3 &dtau, + double* dsloc_11, + double* dsloc_12, + double* dsloc_13, + double* dsloc_22, + double* dsloc_23, + double* dsloc_33, + double* dhloc_fixed_11, + double* dhloc_fixed_12, + double* dhloc_fixed_13, + double* dhloc_fixed_22, + double* dhloc_fixed_23, + double* dhloc_fixed_33); + + /** + * @brief set each element without derivatives + */ + void single_overlap( + LCAO_Matrix& lm, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + const Parallel_Orbitals& pv, + const UnitCell& ucell, + const int nspin, + const bool cal_stress, + const int iw1_all, + const int iw2_all, + const int m1, + const int m2, + const char &dtype, + const int T1, + const int L1, + const int N1, + const int T2, + const int L2, + const int N2, + const ModuleBase::Vector3 &dtau, + const ModuleBase::Vector3 &tau1, + const ModuleBase::Vector3 &tau2, + const int npol, + const int jj, + const int jj0, + const int kk, + const int kk0, + int& nnr, // output value + int& total_nnr, // output value + double *olm, // output value + double *HSloc); // output value + /** + * @brief set each element of T matrices + */ + void single_derivative( + LCAO_Matrix& lm, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + const Parallel_Orbitals& pv, + const UnitCell& ucell, + const int nspin, + const bool cal_stress, + const int iw1_all, + const int iw2_all, + const int m1, + const int m2, + const char &dtype, + const int T1, + const int L1, + const int N1, + const int T2, + const int L2, + const int N2, + const ModuleBase::Vector3 &dtau, + const ModuleBase::Vector3 &tau1, + const ModuleBase::Vector3 &tau2, + const int npol, + const int jj, + const int jj0, + const int kk, + const int kk0, + int& nnr, // output value + int& total_nnr, // output value + double *olm); // output value + /** + * @brief set the elements of S and T matrices + */ + void build_ST_new( + LCAO_Matrix& lm, + const char& dtype, + const bool& cal_deri, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + const Parallel_Orbitals& pv, + const ORB_gen_tables& uot, + Grid_Driver* GridD, + double* SHlocR, + bool cal_syns = false, + double dmax = 0.0); } #endif diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.cpp deleted file mode 100644 index f8e62ecdff..0000000000 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.cpp +++ /dev/null @@ -1,1321 +0,0 @@ -#include "LCAO_gen_fixedH.h" -#include "module_hamilt_pw/hamilt_pwdft/global.h" -#include "module_hamilt_pw/hamilt_pwdft/wavefunc.h" -#include "module_cell/module_neighbor/sltk_grid_driver.h" -#include -#include -#include -#include "module_base/timer.h" - -#ifdef __MKL -#include -#endif - -#ifdef _OPENMP -#include -#endif - -LCAO_gen_fixedH::LCAO_gen_fixedH() -{} - -LCAO_gen_fixedH::~LCAO_gen_fixedH() -{} - - -void LCAO_gen_fixedH::build_ST_new(const char& dtype, - const bool& calc_deri, - const UnitCell &ucell, - const LCAO_Orbitals& orb, - const Parallel_Orbitals& pv, - const ORB_gen_tables& uot, - Grid_Driver* GridD, - double* HSloc, - bool cal_syns, - double dmax) -{ - ModuleBase::TITLE("LCAO_gen_fixedH","build_ST_new"); - ModuleBase::timer::tick("LCAO_gen_fixedH","build_ST_new"); - - const int nspin = GlobalV::NSPIN; - const int npol = GlobalV::NPOL; - const bool cal_stress = GlobalV::CAL_STRESS; - const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; - - int total_nnr = 0; -#ifdef _OPENMP -#pragma omp parallel reduction(+:total_nnr) -{ -#endif - //array to store data - double olm[3]={0.0,0.0,0.0}; - - //\sum{T} e**{ikT} <\phi_{ia}|d\phi_{k\beta}(T)> - ModuleBase::Vector3 tau1, tau2, dtau; - ModuleBase::Vector3 dtau1, dtau2, tau0; -#ifdef _OPENMP -// use schedule(dynamic) for load balancing because adj_num is various -#pragma omp for schedule(dynamic) -#endif - for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - const int T1 = ucell.iat2it[iat1]; - const Atom* atom1 = &ucell.atoms[T1]; - const int I1 = ucell.iat2ia[iat1]; - { - tau1 = atom1->tau[I1]; - - //GridD->Find_atom(tau1); - AdjacentAtomInfo adjs; - GridD->Find_atom(ucell, tau1, T1, I1, &adjs); - // Record_adj.for_2d() may not called in some case - int nnr = pv.nlocstart ? pv.nlocstart[iat1] : 0; - - if (cal_syns) - { - for (int k = 0; k < 3; k++) - { - tau1[k] = tau1[k] - atom1->vel[I1][k] * INPUT.mdp.md_dt / ucell.lat0 ; - } - } - - for (int ad = 0; ad < adjs.adj_num+1; ++ad) - { - const int T2 = adjs.ntype[ad]; - const int I2 = adjs.natom[ad]; - Atom* atom2 = &ucell.atoms[T2]; - tau2 = adjs.adjacent_tau[ad]; - dtau = tau2 - tau1; - double distance = dtau.norm() * ucell.lat0; - double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); - - if(distance < rcut) - { - int iw1_all = ucell.itiaiw2iwt( T1, I1, 0) ; //iw1_all = combined index (it, ia, iw) - - for(int jj=0; jjnw*npol; ++jj) - { - const int jj0 = jj/npol; - const int L1 = atom1->iw2l[jj0]; - const int N1 = atom1->iw2n[jj0]; - const int m1 = atom1->iw2m[jj0]; - - int iw2_all = ucell.itiaiw2iwt( T2, I2, 0);//zhengdy-soc - for(int kk=0; kknw*npol; ++kk) - { - const int kk0 = kk/npol; - const int L2 = atom2->iw2l[kk0]; - const int N2 = atom2->iw2n[kk0]; - const int m2 = atom2->iw2m[kk0]; - - // mohan add 2010-06-29 - // this is in fact the same as in build_Nonlocal_mu, - // the difference is that here we use {L,N,m} for ccycle, - // build_Nonlocal_mu use atom.nw for cycle. - // so, here we use ParaO::in_this_processor, - // in build_Non... use global2local_row - // and global2local_col directly, - if (!pv.in_this_processor(iw1_all, iw2_all)) - { - ++iw2_all; - continue; - } - - olm[0] = olm[1] = olm[2] = 0.0; - - if(!calc_deri) - { - // mohan add 2021-03-30 -#ifdef USE_NEW_TWO_CENTER - //================================================================= - // new two-center integral (temporary) - //================================================================= - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; - int M2 = (m2 % 2 == 0) ? -m2/2 : (m2+1)/2; - - switch (dtype) - { - case 'S': - uot.two_center_bundle->overlap_orb->calculate(T1, L1, N1, M1, - T2, L2, N2, M2, dtau * ucell.lat0, olm); - break; - case 'T': - uot.two_center_bundle->kinetic_orb->calculate(T1, L1, N1, M1, - T2, L2, N2, M2, dtau * ucell.lat0, olm); - break; - default: // not supposed to happen - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","dtype must be S or T"); - } -#else - uot.snap_psipsi( orb, olm, 0, dtype, - tau1, T1, L1, m1, N1, // info of atom1 - adjs.adjacent_tau[ad], T2, L2, m2, N2, // info of atom2 - cal_syns, - dmax); -#endif - //================================================================= - // end of new two-center integral (temporary) - //================================================================= - - // When NSPIN == 4 , only diagonal term is calculated for T or S Operators - // use olm1 to store the diagonal term with complex data type. - std::complex olm1[4]; - if(nspin == 4) - { - olm1[0] = std::complex(olm[0], 0.0); - olm1[1] = ModuleBase::ZERO; - olm1[2] = ModuleBase::ZERO; - olm1[3] = std::complex(olm[0], 0.0); - } - - if(gamma_only_local) - { - // mohan add 2010-06-29 - // set the value in Hloc and Sloc - // according to global2local_row and global2local_col - // the last paramete: 1 for Sloc, 2 for Hloc - // and 3 for Hloc_fixed. - this->LM->set_HSgamma(iw1_all, iw2_all, olm[0], HSloc); - } - else // k point algorithm - { - // mohan add 2010-10 - // set the values in SlocR and Hloc_fixedR. - // which is a 1D array. - if(dtype=='S') - { - if (nspin == 1 || nspin ==2) - { - HSloc[nnr] = olm[0]; - } - else if (nspin == 4) - {//only has diagonal term here. - int is = (jj-jj0*npol) + (kk-kk0*npol)*2; - // SlocR_soc is a temporary array with complex data type, it will be refactor soon. - this->LM->SlocR_soc[nnr] = olm1[is]; - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","nspin must be 1, 2 or 4"); - } - } - else if(dtype=='T') - { - if(nspin == 1 || nspin ==2) - { - HSloc[nnr] = olm[0];// - } - else if (nspin == 4) - {//only has diagonal term here. - int is = (jj-jj0*npol) + (kk-kk0*npol)*2; - // Hloc_fixedR_soc is a temporary array with complex data type, it will be refactor soon. - this->LM->Hloc_fixedR_soc[nnr] = olm1[is]; - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","nspin must be 1, 2 or 4"); - } - } - ++total_nnr; - ++nnr; - } - } - else // calculate the derivative - { -#ifdef USE_NEW_TWO_CENTER - //================================================================= - // new two-center integral (temporary) - //================================================================= - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; - int M2 = (m2 % 2 == 0) ? -m2/2 : (m2+1)/2; - switch (dtype) - { - case 'S': - uot.two_center_bundle->overlap_orb->calculate( - T1, - L1, - N1, - M1, - T2, - L2, - N2, - M2, - dtau * ucell.lat0, - nullptr, - olm); - break; - case 'T': - uot.two_center_bundle->kinetic_orb->calculate( - T1, - L1, - N1, - M1, - T2, - L2, - N2, - M2, - dtau * ucell.lat0, - nullptr, - olm); - break; - default: // not supposed to happen - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","dtype must be S or T"); - } -#else - uot.snap_psipsi( orb, olm, 1, dtype, - tau1, T1, L1, m1, N1, - adjs.adjacent_tau[ad], T2, L2, m2, N2 - ); - -#endif - //================================================================= - // end of new two-center integral (temporary) - //================================================================= - - if(gamma_only_local) - { - this->LM->set_force( - pv, - iw1_all, - iw2_all, - olm[0], - olm[1], - olm[2], - dtype, - this->LM->DSloc_x, - this->LM->DSloc_y, - this->LM->DSloc_z, - this->LM->DHloc_fixed_x, - this->LM->DHloc_fixed_y, - this->LM->DHloc_fixed_z); - - if(cal_stress) - { - this->LM->set_stress( - pv, - iw1_all, - iw2_all, - olm[0], - olm[1], - olm[2], - dtype, - dtau, - this->LM->DSloc_11, - this->LM->DSloc_12, - this->LM->DSloc_13, - this->LM->DSloc_22, - this->LM->DSloc_23, - this->LM->DSloc_33, - this->LM->DHloc_fixed_11, - this->LM->DHloc_fixed_12, - this->LM->DHloc_fixed_13, - this->LM->DHloc_fixed_22, - this->LM->DHloc_fixed_23, - this->LM->DHloc_fixed_33); - } - } - else // k point algorithm - { - if(dtype=='S') - { - if (nspin == 1 || nspin ==2) - { - this->LM->DSloc_Rx[nnr] = olm[0]; - this->LM->DSloc_Ry[nnr] = olm[1]; - this->LM->DSloc_Rz[nnr] = olm[2]; - } - else if (nspin == 4) - { - int is = (jj-jj0*npol) + (kk-kk0*npol)*2; - if (is == 0) // is==3 is not needed in force calculation - { - this->LM->DSloc_Rx[nnr] = olm[0]; - this->LM->DSloc_Ry[nnr] = olm[1]; - this->LM->DSloc_Rz[nnr] = olm[2]; - } - else - { - this->LM->DSloc_Rx[nnr] = 0.0; - this->LM->DSloc_Ry[nnr] = 0.0; - this->LM->DSloc_Rz[nnr] = 0.0; - } - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","nspin must be 1, 2 or 4"); - } - if(cal_stress) - { - this->LM->DH_r[nnr*3] = dtau.x; - this->LM->DH_r[nnr*3 + 1] = dtau.y; - this->LM->DH_r[nnr*3 + 2] = dtau.z; - } - } - else if(dtype=='T') - { - // notice the 'sign' - if (nspin == 1 || nspin ==2) - { - this->LM->DHloc_fixedR_x[nnr] = olm[0]; - this->LM->DHloc_fixedR_y[nnr] = olm[1]; - this->LM->DHloc_fixedR_z[nnr] = olm[2]; - if(cal_stress) - { - this->LM->stvnl11[nnr] = olm[0] * dtau.x; - this->LM->stvnl12[nnr] = olm[0] * dtau.y; - this->LM->stvnl13[nnr] = olm[0] * dtau.z; - this->LM->stvnl22[nnr] = olm[1] * dtau.y; - this->LM->stvnl23[nnr] = olm[1] * dtau.z; - this->LM->stvnl33[nnr] = olm[2] * dtau.z; - } - } - else if (nspin == 4) - { - int is = (jj-jj0*npol) + (kk-kk0*npol)*2; - if (is == 0) // is==3 is not needed in force calculation - { - this->LM->DHloc_fixedR_x[nnr] = olm[0]; - this->LM->DHloc_fixedR_y[nnr] = olm[1]; - this->LM->DHloc_fixedR_z[nnr] = olm[2]; - if(cal_stress) - { - this->LM->stvnl11[nnr] = olm[0] * dtau.x; - this->LM->stvnl12[nnr] = olm[0] * dtau.y; - this->LM->stvnl13[nnr] = olm[0] * dtau.z; - this->LM->stvnl22[nnr] = olm[1] * dtau.y; - this->LM->stvnl23[nnr] = olm[1] * dtau.z; - this->LM->stvnl33[nnr] = olm[2] * dtau.z; - } - } - else if (is == 1 || is == 2 || is == 3) - { - this->LM->DHloc_fixedR_x[nnr] = 0.0; - this->LM->DHloc_fixedR_y[nnr] = 0.0; - this->LM->DHloc_fixedR_z[nnr] = 0.0; - if(cal_stress) - { - this->LM->stvnl11[nnr] = 0.0; - this->LM->stvnl12[nnr] = 0.0; - this->LM->stvnl13[nnr] = 0.0; - this->LM->stvnl22[nnr] = 0.0; - this->LM->stvnl23[nnr] = 0.0; - this->LM->stvnl33[nnr] = 0.0; - } - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","is must be 0, 1, 2, 3"); - } - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","nspin must be 1, 2 or 4"); - } - } - ++total_nnr; - ++nnr; - } - } - ++iw2_all; - }// nw2 - ++iw1_all; - }// nw1 - }// distance - else if(distance>=rcut && (!gamma_only_local)) - { - int start1 = ucell.itiaiw2iwt( T1, I1, 0); - int start2 = ucell.itiaiw2iwt( T2, I2, 0); - - bool is_adj = false; - for (int ad0=0; ad0 < adjs.adj_num+1; ++ad0) - { - const int T0 = adjs.ntype[ad0]; - //const int I0 = GridD->getNatom(ad0); - //const int iat0 = ucell.itia2iat(T0, I0); - //const int start0 = ucell.itiaiw2iwt(T0, I0, 0); - tau0 = adjs.adjacent_tau[ad0]; - dtau1 = tau0 - tau1; - double distance1 = dtau1.norm() * ucell.lat0; - double rcut1 = orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); - dtau2 = tau0 - tau2; - double distance2 = dtau2.norm() * ucell.lat0; - double rcut2 = orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); - if( distance1 < rcut1 && distance2 < rcut2 ) - { - is_adj = true; - break; - } - }//ad0 - - - if( is_adj ) - { - for(int jj=0; jjnw * npol; ++jj) - { - const int mu = pv.global2local_row(start1 + jj); - if(mu<0)continue; - for(int kk=0; kknw * npol; ++kk) - { - const int nu = pv.global2local_col(start2 + kk); - if(nu<0)continue; - ++total_nnr; - ++nnr; - }//kk - }//jj - } - }//distance - }// ad - }// I1 - }// T1 -#ifdef _OPENMP -} -#endif - - if(!gamma_only_local) - { - if(total_nnr != pv.nnr) - { - std::cout << " nnr=" << total_nnr << " LNNR.nnr=" << pv.nnr << std::endl; - GlobalV::ofs_running << " nnr=" << total_nnr << " LNNR.nnr=" << pv.nnr << std::endl; - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_ST_new","nnr != LNNR.nnr"); - } - } - - ModuleBase::timer::tick("LCAO_gen_fixedH","build_ST_new"); - return; -} - -typedef std::tuple key_tuple; - -#include "record_adj.h" //mohan add 2012-07-06 -void LCAO_gen_fixedH::build_Nonlocal_mu_new(double* NLloc, - const bool &calc_deri, - const UnitCell &ucell, - const LCAO_Orbitals &orb, - const ORB_gen_tables &uot, - Grid_Driver* GridD) -{ - ModuleBase::TITLE("LCAO_gen_fixedH","b_NL_mu_new"); - ModuleBase::timer::tick("LCAO_gen_fixedH", "b_NL_mu_new"); - const Parallel_Orbitals* pv = this->LM->ParaV; - - const int nspin = GlobalV::NSPIN; - const int npol = GlobalV::NPOL; - const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; - - // < phi1 | beta > < beta | phi2 > - // phi1 is within the unitcell. - // while beta is in the supercell. - // while phi2 is in the supercell. - - - //Step 1 : generate - - //This is the data structure for storing - //It is a 4 layer data structure - //The outmost layer is std::vector with size being number of atoms in unit cell - //The second layer is a map, the key being a combination of 4 number (iat, dRx, dRy, dRz) - //which identifies a unique adjacent atom of the first atom - //The third layer is an unordered map, with key being the index of atomic basis |psi> - //The inner layer is a vector, each element representing a projector |beta> - //It then either stores the number (nlm_tot) - //or a vector of 4, storing additionally (nlm_tot1) x_i=x,y,z - std::vector>>> nlm_tot; - std::vector>>>> nlm_tot1; - - if(!calc_deri) - { - nlm_tot.resize(ucell.nat); - } - else - { - nlm_tot1.resize(ucell.nat); - } -#ifdef _OPENMP -#pragma omp parallel for schedule(dynamic) -#endif - for(int iat=0;iat tau = ucell.atoms[it].tau[ia]; - AdjacentAtomInfo adjs; - GridD->Find_atom(ucell, tau ,it, ia, &adjs); - - if(!calc_deri) - { - nlm_tot[iat].clear(); - } - else - { - nlm_tot1[iat].clear(); - } - - for (int ad=0; ad &tau1 = adjs.adjacent_tau[ad]; - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*npol; - - const ModuleBase::Vector3 dtau = tau1-tau; - const double dist1 = dtau.norm2() * pow(ucell.lat0,2); - - if (dist1 > pow(Rcut_Beta + Rcut_AO1,2)) - { - continue; - } - std::unordered_map> nlm_cur; - std::unordered_map>> nlm_cur1; - - if(!calc_deri) - { - nlm_cur.clear(); - } - else - { - nlm_cur1.clear(); - } - for (int iw1=0; iw1global2local_row(iw1_all); - const int iw2_local = pv->global2local_col(iw1_all); - if(iw1_local < 0 && iw2_local < 0)continue; - const int iw1_0 = iw1/npol; - std::vector> nlm; - //nlm is a vector of vectors, but size of outer vector is only 1 here - //If we are calculating force, we need also to store the gradient - //and size of outer vector is then 4 - //inner loop : all projectors (L0,M0) - -#ifdef USE_NEW_TWO_CENTER - //================================================================= - // new two-center integral (temporary) - //================================================================= - int L1 = atom1->iw2l[ iw1_0 ]; - int N1 = atom1->iw2n[ iw1_0 ]; - int m1 = atom1->iw2m[ iw1_0 ]; - - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; - - ModuleBase::Vector3 dtau = tau - tau1; - uot.two_center_bundle->overlap_orb_beta->snap( - T1, L1, N1, M1, it, dtau * ucell.lat0, calc_deri, nlm); -#else - uot.snap_psibeta_half( - orb, - ucell.infoNL, - nlm, tau1, T1, - atom1->iw2l[ iw1_0 ], // L1 - atom1->iw2m[ iw1_0 ], // m1 - atom1->iw2n[ iw1_0 ], // N1 - tau, it, calc_deri); //R0,T0 -#endif - //================================================================= - // end of new two-center integral (temporary) - //================================================================= - - if(!calc_deri) - { - nlm_cur.insert({iw1_all,nlm[0]}); - } - else - { - nlm_cur1.insert({iw1_all,nlm}); - } - }//end iw - - const int iat1=ucell.itia2iat(T1, I1); - const int rx1=adjs.box[ad].x; - const int ry1=adjs.box[ad].y; - const int rz1=adjs.box[ad].z; - key_tuple key_1(iat1,rx1,ry1,rz1); - - if(!calc_deri) - { - nlm_tot[iat][key_1]=nlm_cur; - } - else - { - nlm_tot1[iat][key_1]=nlm_cur1; - } - }//end ad - } - //======================================================= - //Step2: - //calculate sum_(L0,M0) beta - //and accumulate the value to Hloc_fixedR(i,j) - //======================================================= - int total_nnr = 0; -#ifdef _OPENMP -#pragma omp parallel reduction(+:total_nnr) -{ -#endif - ModuleBase::Vector3 tau1, tau2, dtau; - ModuleBase::Vector3 dtau1, dtau2, tau0; - double distance = 0.0; - double rcut = 0.0; - double rcut1 = 0.0; - double rcut2 = 0.0; - - // Record_adj RA; - // RA.for_2d(); - - // psi1 -#ifdef _OPENMP -// use schedule(dynamic) for load balancing because adj_num is various -#pragma omp for schedule(dynamic) -#endif - for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - const int T1 = ucell.iat2it[iat1]; - const Atom* atom1 = &ucell.atoms[T1]; - const int I1 = ucell.iat2ia[iat1]; - { - //GridD->Find_atom( atom1->tau[I1] ); - AdjacentAtomInfo adjs; - GridD->Find_atom(ucell, atom1->tau[I1] ,T1, I1, &adjs); - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - // Record_adj.for_2d() may not called in some case - int nnr = pv->nlocstart ? pv->nlocstart[iat1] : 0; - tau1 = atom1->tau[I1]; - - // psi2 - for (int ad2=0; ad2= rcut) - { - for (int ad0 = 0; ad0 < adjs.adj_num+1; ++ad0) - { - const int T0 = adjs.ntype[ad0]; - //const int I0 = GridD->getNatom(ad0); - //const int T0 = RA.info[iat1][ad0][3]; - //const int I0 = RA.info[iat1][ad0][4]; - //const int iat0 = ucell.itia2iat(T0, I0); - //const int start0 = ucell.itiaiw2iwt(T0, I0, 0); - - tau0 = adjs.adjacent_tau[ad0]; - dtau1 = tau0 - tau1; - dtau2 = tau0 - tau2; - - const double distance1 = dtau1.norm2() * pow(ucell.lat0,2); - const double distance2 = dtau2.norm2() * pow(ucell.lat0,2); - - rcut1 = pow(orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); - rcut2 = pow(orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); - - if( distance1 < rcut1 && distance2 < rcut2 ) - { - is_adj = true; - break; - } - } - } - - - if(is_adj) - { - // < psi1 | all projectors | psi2 > - // ----------------------------- enter the nnr increaing zone ------------------------- - for (int ad0=0; ad0 < adjs.adj_num+1 ; ++ad0) - { - const int T0 = adjs.ntype[ad0]; - const int I0 = adjs.natom[ad0]; - const int iat = ucell.itia2iat(T0,I0); - - // mohan add 2010-12-19 - if( ucell.infoNL.nproj[T0] == 0) - { - continue; - } - - //const int I0 = GridD->getNatom(ad0); - //const int start0 = ucell.itiaiw2iwt(T0, I0, 0); - tau0 = adjs.adjacent_tau[ad0]; - - dtau1 = tau0 - tau1; - dtau2 = tau0 - tau2; - const double distance1 = dtau1.norm2() * pow(ucell.lat0,2); - const double distance2 = dtau2.norm2() * pow(ucell.lat0,2); - - // seems a bug here!! mohan 2011-06-17 - rcut1 = pow(orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); - rcut2 = pow(orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); - - if(distance1 >= rcut1 || distance2 >= rcut2) - { - continue; - } - //const Atom* atom0 = &ucell.atoms[T0]; - const int rx0=adjs.box[ad0].x; - const int ry0=adjs.box[ad0].y; - const int rz0=adjs.box[ad0].z; - key_tuple key1(iat1,-rx0,-ry0,-rz0); - key_tuple key2(iat2,rx2-rx0,ry2-ry0,rz2-rz0); - - std::unordered_map> *nlm_cur1_e; //left hand side, for energy - std::unordered_map>> *nlm_cur1_f; //lhs, for force - std::unordered_map> *nlm_cur2_e; //rhs, for energy - std::unordered_map>> *nlm_cur2_f; //rhs, for force - - if(!calc_deri) - { - nlm_cur1_e = &nlm_tot[iat][key1]; - nlm_cur2_e = &nlm_tot[iat][key2]; - } - else - { - nlm_cur1_f = &nlm_tot1[iat][key1]; - nlm_cur2_f = &nlm_tot1[iat][key2]; - } - - int nnr_inner = 0; - - for (int j=0; jnw*npol; j++) - { - const int j0 = j/npol;//added by zhengdy-soc - const int iw1_all = start1 + j; - const int mu = pv->global2local_row(iw1_all); - if(mu < 0)continue; - - // fix a serious bug: atom2[T2] -> atom2 - // mohan 2010-12-20 - for (int k=0; knw*npol; k++) - { - const int k0 = k/npol; - const int iw2_all = start2 + k; - const int nu = pv->global2local_col(iw2_all); - if(nu < 0)continue; - - if(!calc_deri) - { - std::vector nlm_1=(*nlm_cur1_e)[iw1_all]; - std::vector nlm_2=(*nlm_cur2_e)[iw2_all]; - if(nspin == 4) - { - std::complex nlm_tmp = ModuleBase::ZERO; - int is0 = (j-j0*npol) + (k-k0*npol)*2; - for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; - nlm_tmp += nlm_1[p1] * nlm_2[p2] * ucell.atoms[T0].ncpp.d_so(is0, p2, p1); - } - this->LM->Hloc_fixedR_soc[nnr+nnr_inner] += nlm_tmp; - } - else - { - double nlm_tmp = 0.0; - const int nproj = ucell.infoNL.nproj[T0]; - int ib = 0; - for (int nb = 0; nb < nproj; nb++) - { - const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); - for(int m=0;m<2*L0+1;m++) - { - if(nlm_1[ib]!=0.0 && nlm_2[ib]!=0.0) - { - nlm_tmp += nlm_1[ib]*nlm_2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); - } - ib+=1; - } - } - assert(ib==nlm_1.size()); - - if(gamma_only_local) - { - // mohan add 2010-12-20 - if( nlm_tmp!=0.0 ) - { - this->LM->set_HSgamma(iw1_all, iw2_all, nlm_tmp, NLloc);//N stands for nonlocal. - } - } - else - { - if( nlm_tmp!=0.0 ) - { - NLloc[nnr+nnr_inner] += nlm_tmp; - } - } - } - }// calc_deri - else // calculate the derivative - { - if (nspin == 4) - { - std::vector nlm_1 = (*nlm_cur2_f)[iw2_all][0]; - std::vector> nlm_2; - nlm_2.resize(3); - for (int i=0; i< 3; i++) - { - nlm_2[i] = (*nlm_cur1_f)[iw1_all][i+1]; - } - std::complex nlm[4][3] = {ModuleBase::ZERO}; - int is0 = (j-j0*npol) + (k-k0*npol)*2; - for (int no=0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) - { - const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; - const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; - if (is0 == 0) - { - this->LM->DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - } - else if (is0 == 1) - { - this->LM->DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; - } - else if (is0 == 2) - { - this->LM->DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* - (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; - this->LM->DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* - (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; - this->LM->DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* - (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() - + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; - } - else if (is0 == 3) - { - this->LM->DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - this->LM->DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* - (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() - - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; - } - } - } - else if (nspin == 1 || nspin == 2) - { - if(gamma_only_local) - { - double nlm[3]={0,0,0}; - - // sum all projectors for one atom. - std::vector nlm_1 = (*nlm_cur1_f)[iw1_all][0]; - std::vector> nlm_2; - nlm_2.resize(3); - for(int i=0;i<3;i++) - { - nlm_2[i] = (*nlm_cur2_f)[iw2_all][i+1]; - } - - assert(nlm_1.size()==nlm_2[0].size()); - - const int nproj = ucell.infoNL.nproj[T0]; - int ib = 0; - for (int nb = 0; nb < nproj; nb++) - { - const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); - for(int m=0;m<2*L0+1;m++) - { - for(int ir=0;ir<3;ir++) - { - nlm[ir] += nlm_2[ir][ib]*nlm_1[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); - } - ib+=1; - } - } - assert(ib==nlm_1.size()); - - this->LM->set_force( - *this->LM->ParaV, - iw1_all, - iw2_all, - nlm[0], - nlm[1], - nlm[2], - 'N', - this->LM->DSloc_x, - this->LM->DSloc_y, - this->LM->DSloc_z, - this->LM->DHloc_fixed_x, - this->LM->DHloc_fixed_y, - this->LM->DHloc_fixed_z); - - } - else - { - // mohan change the order on 2011-06-17 - // origin: < psi1 | beta > < beta | dpsi2/dtau > - //now: < psi1/dtau | beta > < beta | psi2 > - double nlm[3]={0,0,0}; - - // sum all projectors for one atom. - std::vector nlm_1 = (*nlm_cur2_f)[iw2_all][0]; - std::vector> nlm_2; - nlm_2.resize(3); - for(int i=0;i<3;i++) - { - nlm_2[i] = (*nlm_cur1_f)[iw1_all][i+1]; - } - - assert(nlm_1.size()==nlm_2[0].size()); - - const int nproj = ucell.infoNL.nproj[T0]; - int ib = 0; - for (int nb = 0; nb < nproj; nb++) - { - const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); - for(int m=0;m<2*L0+1;m++) - { - for(int ir=0;ir<3;ir++) - { - nlm[ir] += nlm_2[ir][ib]*nlm_1[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); - } - ib+=1; - } - } - assert(ib==nlm_1.size()); - - this->LM->DHloc_fixedR_x[nnr+nnr_inner] += nlm[0]; - this->LM->DHloc_fixedR_y[nnr+nnr_inner] += nlm[1]; - this->LM->DHloc_fixedR_z[nnr+nnr_inner] += nlm[2]; - } - } - else - { - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_Nonlocal_mu_new","nspin must be 1, 2 or 4"); - } - }//!calc_deri - nnr_inner++; - }// k - } // j - } // ad0 - - //outer circle : accumulate nnr - for (int j=0; jnw*npol; j++) - { - const int j0 = j/npol;//added by zhengdy-soc - const int iw1_all = start1 + j; - const int mu = pv->global2local_row(iw1_all); - if(mu < 0)continue; - - // fix a serious bug: atom2[T2] -> atom2 - // mohan 2010-12-20 - for (int k=0; knw*npol; k++) - { - const int k0 = k/npol; - const int iw2_all = start2 + k; - const int nu = pv->global2local_col(iw2_all); - if(nu < 0)continue; - total_nnr++; - nnr++; - } - } - }// end is_adj - } // ad2 - } // I1 - } // T1 -#ifdef _OPENMP -} -#endif - if(!gamma_only_local) - { - if( total_nnr!=pv->nnr) - { - GlobalV::ofs_running << " nr=" << total_nnr << std::endl; - GlobalV::ofs_running << " pv->nnr=" << pv->nnr << std::endl; - ModuleBase::WARNING_QUIT("LCAO_gen_fixedH::build_Nonlocal_mu_new","nnr!=LNNR.nnr"); - } - } - - ModuleBase::timer::tick ("LCAO_gen_fixedH","b_NL_mu_new"); - return; -} - -void LCAO_gen_fixedH::build_Nonlocal_beta_new(double* HSloc, - const UnitCell &ucell, - const LCAO_Orbitals& orb, - const ORB_gen_tables& uot, - Grid_Driver* GridD) //update by liuyu 2021-04-07 -{ - ModuleBase::TITLE("LCAO_gen_fixedH","b_NL_beta_new"); - ModuleBase::timer::tick ("LCAO_gen_fixedH","b_NL_beta_new"); - - const Parallel_Orbitals* pv = this->LM->ParaV; - const int npol = GlobalV::NPOL; - -#ifdef __MKL - const int mkl_threads = mkl_get_max_threads(); - mkl_set_num_threads(1); -#endif - - const std::vector adjs_all = GridD->get_adjs(ucell); - -#ifdef _OPENMP - #pragma omp parallel - { - double* Nonlocal_thread; - Nonlocal_thread = new double[pv->nloc]; - ModuleBase::GlobalFunc::ZEROS(Nonlocal_thread, pv->nloc); - #pragma omp for schedule(dynamic) -#endif - for(int iat=0; iat, where beta runs over L0,M0 on atom I0 - //and psi runs over atomic basis sets on the current core - //======================================================= - #ifdef _OPENMP - std::vector>> nlm_tot_thread; - nlm_tot_thread.resize(adjs_all[iat].adj_num + 1); - #else - std::vector>> nlm_tot; - nlm_tot.resize(adjs_all[iat].adj_num + 1); - #endif - - const ModuleBase::Vector3 tau0 = atom0->tau[I0]; - const double Rcut_Beta = ucell.infoNL.Beta[T0].get_rcut_max(); - - //outermost loop : all adjacent atoms - for(int ad_count=0; ad_count < adjs_all[iat].adj_num + 1; ad_count++) - { - const int T1 = adjs_all[iat].ntype[ad_count]; - const int I1 = adjs_all[iat].natom[ad_count]; - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - const ModuleBase::Vector3 tau1 = adjs_all[iat].adjacent_tau[ad_count]; - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*npol; - - #ifdef _OPENMP - nlm_tot_thread[ad_count].clear(); - #else - nlm_tot[ad_count].clear(); - #endif - - //middle loop : atomic basis on current processor (either row or column) - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - if (dist1 > Rcut_Beta + Rcut_AO1) - { - continue; - } - - for(int iw1=0; iw1global2local_row(iw1_all); - const int iw2_local = pv->global2local_col(iw1_all); - - if(iw1_local < 0 && iw2_local < 0) continue; - - const int iw1_0 = iw1/npol; - std::vector> nlm; - //2D, but first dimension is only 1 here - //for force, the right hand side is the gradient - //and the first dimension is then 3 - //inner loop : all projectors (L0,M0) - - -#ifdef USE_NEW_TWO_CENTER - //================================================================= - // new two-center integral (temporary) - //================================================================= - int L1 = atom1->iw2l[ iw1_0 ]; - int N1 = atom1->iw2n[ iw1_0 ]; - int m1 = atom1->iw2m[ iw1_0 ]; - - // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) - int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; - - ModuleBase::Vector3 dtau = ucell.atoms[T0].tau[I0] - tau1; - uot.two_center_bundle->overlap_orb_beta->snap( - T1, L1, N1, M1, T0, dtau * ucell.lat0, false, nlm); -#else - uot.snap_psibeta_half( - orb, - ucell.infoNL, - nlm, tau1, T1, - atom1->iw2l[ iw1_0 ], // L1 - atom1->iw2m[ iw1_0 ], // m1 - atom1->iw2n[ iw1_0 ], // N1 - ucell.atoms[T0].tau[I0], T0, 0); //R0,T0 -#endif - //================================================================= - // end of new two-center integral (temporary) - //================================================================= - - #ifdef _OPENMP - nlm_tot_thread[ad_count].insert({iw1_all,nlm[0]}); - #else - nlm_tot[ad_count].insert({iw1_all,nlm[0]}); - #endif - }//end iw - }//end ad - - //======================================================= - //Step2: - //calculate sum_(L0,M0) beta - //and accumulate the value to Hloc_fixed(i,j) - //======================================================= - for(int ad1_count=0; ad1_count < adjs_all[iat].adj_num + 1; ad1_count++) - { - const int T1 = adjs_all[iat].ntype[ad1_count]; - const int I1 = adjs_all[iat].natom[ad1_count]; - const int start1 = ucell.itiaiw2iwt(T1, I1, 0); - const ModuleBase::Vector3 tau1 = adjs_all[iat].adjacent_tau[ad1_count]; - const Atom* atom1 = &ucell.atoms[T1]; - const int nw1_tot = atom1->nw*npol; - const double Rcut_AO1 = orb.Phi[T1].getRcut(); - - for (int ad2_count=0; ad2_count < adjs_all[iat].adj_num + 1; ad2_count++) - { - const int T2 = adjs_all[iat].ntype[ad2_count]; - const int I2 = adjs_all[iat].natom[ad2_count]; - const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - const ModuleBase::Vector3 tau2 = adjs_all[iat].adjacent_tau[ad2_count]; - const Atom* atom2 = &ucell.atoms[T2]; - const int nw2_tot = atom2->nw*npol; - const double Rcut_AO2 = orb.Phi[T2].getRcut(); - const double dist1 = (tau1-tau0).norm() * ucell.lat0; - const double dist2 = (tau2-tau0).norm() * ucell.lat0; - - if (dist1 > Rcut_Beta + Rcut_AO1 - || dist2 > Rcut_Beta + Rcut_AO2) - { - continue; - } - - for(int iw1=0; iw1global2local_row(iw1_all); - if(iw1_local < 0) continue; - const int iw1_0 = iw1/npol; - for(int iw2=0; iw2global2local_col(iw2_all); - if(iw2_local < 0) continue; - const int iw2_0 = iw2/npol; - #ifdef _OPENMP - std::vector nlm1 = nlm_tot_thread[ad1_count][iw1_all]; - std::vector nlm2 = nlm_tot_thread[ad2_count][iw2_all]; - #else - std::vector nlm1 = nlm_tot[ad1_count][iw1_all]; - std::vector nlm2 = nlm_tot[ad2_count][iw2_all]; - #endif - - assert(nlm1.size()==nlm2.size()); - #ifdef _OPENMP - double nlm_thread=0.0; - #else - double nlm=0.0; - #endif - const int nproj = ucell.infoNL.nproj[T0]; - int ib = 0; - for(int nb = 0; nb < nproj; nb++) - { - const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); - for(int m=0;m<2*L0+1;m++) - { - #ifdef _OPENMP - nlm_thread += nlm1[ib]*nlm2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); - #else - nlm += nlm1[ib]*nlm2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); - #endif - ib+=1; - } - } - assert(ib==nlm1.size()); - - const int ir = pv->global2local_row(iw1_all); - const int ic = pv->global2local_col(iw2_all); - long index=0; - if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()) - { - index=ic*pv->nrow+ir; - } - else - { - index=ir*pv->ncol+ic; - } -#ifdef _OPENMP - Nonlocal_thread[index] += nlm_thread; -#else - this->LM->set_HSgamma(iw1_all, iw2_all, nlm, HSloc); -#endif - }//iw2 - }//iw1 - }//ad2 - }//ad1 - }//end iat - - #ifdef _OPENMP - #pragma omp critical(cal_nonlocal) - { - for(int i=0; inloc; i++) - { - this->LM->Hloc_fixed[i] += Nonlocal_thread[i]; - } - } - delete[] Nonlocal_thread; - #endif -#ifdef _OPENMP - } -#endif - -#ifdef __MKL - mkl_set_num_threads(mkl_threads); -#endif - - ModuleBase::timer::tick ("LCAO_gen_fixedH","b_NL_beta_new"); - return; -} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h deleted file mode 100644 index 510cc79a60..0000000000 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef LCAO_gen_fixedH_H -#define LCAO_gen_fixedH_H - -#include "module_base/global_function.h" -#include "module_base/global_variable.h" -#include "module_basis/module_ao/ORB_gen_tables.h" -#include "module_cell/module_neighbor/sltk_grid_driver.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" -#include "module_basis/module_ao/ORB_gen_tables.h" - -class LCAO_gen_fixedH -{ - - public: - LCAO_Matrix* LM; - - LCAO_gen_fixedH(); - ~LCAO_gen_fixedH(); - - //void calculate_NL_no(double* HlocR); - // void calculate_NL_no(std::complex* HlocR); - //void calculate_T_no(double* HlocR); - // void calculate_T_no(std::complex* HlocR); - //void calculate_S_no(double* SlocR); - // void calculate_S_no(std::complex* SlocR); - void build_ST_new(const char& dtype, - const bool& cal_deri, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const Parallel_Orbitals& pv, - const ORB_gen_tables& uot, - Grid_Driver* GridD, - double* SHlocR, - bool cal_syns = false, - double dmax = 0.0); - // cal_syns : calculate asynchronous overlap matrix for Hefei-NAMD - - // can used in gamma algorithm. - void build_Nonlocal_beta_new(double* Hloc, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const ORB_gen_tables& uot, - Grid_Driver* GridD); - - void build_Nonlocal_mu_new(double* HlocR, - const bool& calc_deri, - const UnitCell& ucell, - const LCAO_Orbitals& orb, - const ORB_gen_tables& uot, - Grid_Driver* GridD); -}; - -#endif diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp index 03bd0ae40e..262ab295eb 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp @@ -186,133 +186,6 @@ void LCAO_Matrix::set_HSk(const int& iw1_all, const int& iw2_all, const std::com return; } -void LCAO_Matrix::set_force -( - const Parallel_Orbitals &pv, - const int &iw1_all, - const int &iw2_all, - const double& vx, - const double& vy, - const double& vz, - const char &dtype, - double* dsloc_x, - double* dsloc_y, - double* dsloc_z, - double* dhloc_fixed_x, - double* dhloc_fixed_y, - double* dhloc_fixed_z) -{ - // use iw1_all and iw2_all to set Hloc - // becareful! The ir and ic may < 0!!!!!!!!!!!!!!!! - const int ir = pv.global2local_row(iw1_all); - const int ic = pv.global2local_col(iw2_all); - const long index = ir * pv.ncol + ic; - - if( index >= pv.nloc) - { - std::cout << " iw1_all = " << iw1_all << std::endl; - std::cout << " iw2_all = " << iw2_all << std::endl; - std::cout << " ir = " << ir << std::endl; - std::cout << " ic = " << ic << std::endl; - std::cout << " index = " << index << std::endl; - std::cout << " pv.nloc = " << pv.nloc << std::endl; - ModuleBase::WARNING_QUIT("LCAO_Matrix","set_force"); - } - - if (dtype == 'S') - { - dsloc_x[index] += vx; - dsloc_y[index] += vy; - dsloc_z[index] += vz; - } - else if (dtype == 'T') - { - // notice, the sign is '-', minus. - dhloc_fixed_x[index] -= vx; - dhloc_fixed_y[index] -= vy; - dhloc_fixed_z[index] -= vz; - } - else if (dtype == 'N') - { - dhloc_fixed_x[index] += vx; - dhloc_fixed_y[index] += vy; - dhloc_fixed_z[index] += vz; - } - - return; -} - -void LCAO_Matrix::set_stress -( - const Parallel_Orbitals &pv, - const int &iw1_all, - const int &iw2_all, - const double& vx, - const double& vy, - const double& vz, - const char &dtype, - const ModuleBase::Vector3 &dtau, - double* dsloc_11, - double* dsloc_12, - double* dsloc_13, - double* dsloc_22, - double* dsloc_23, - double* dsloc_33, - double* dhloc_fixed_11, - double* dhloc_fixed_12, - double* dhloc_fixed_13, - double* dhloc_fixed_22, - double* dhloc_fixed_23, - double* dhloc_fixed_33) -{ - // use iw1_all and iw2_all to set Hloc - // becareful! The ir and ic may < 0!!!!!!!!!!!!!!!! - const int ir = pv.global2local_row(iw1_all); - const int ic = pv.global2local_col(iw2_all); - const long index = ir * pv.ncol + ic; - - if( index >= pv.nloc) - { - std::cout << " iw1_all = " << iw1_all << std::endl; - std::cout << " iw2_all = " << iw2_all << std::endl; - std::cout << " ir = " << ir << std::endl; - std::cout << " ic = " << ic << std::endl; - std::cout << " index = " << index << std::endl; - std::cout << " pv.nloc = " << pv.nloc << std::endl; - ModuleBase::WARNING_QUIT("LCAO_Matrix","set_stress"); - } - - if (dtype == 'S') - { - dsloc_11[index] += vx * dtau.x; - dsloc_12[index] += vx * dtau.y; - dsloc_13[index] += vx * dtau.z; - dsloc_22[index] += vy * dtau.y; - dsloc_23[index] += vy * dtau.z; - dsloc_33[index] += vz * dtau.z; - } - else if (dtype == 'T') - { - // notice, the sign is '-', minus. - dhloc_fixed_11[index] -= vx * dtau.x; - dhloc_fixed_12[index] -= vx * dtau.y; - dhloc_fixed_13[index] -= vx * dtau.z; - dhloc_fixed_22[index] -= vy * dtau.y; - dhloc_fixed_23[index] -= vy * dtau.z; - dhloc_fixed_33[index] -= vz * dtau.z; - } - else if (dtype == 'N') - { - dhloc_fixed_11[index] += vx * dtau.x; - dhloc_fixed_12[index] += vx * dtau.y; - dhloc_fixed_13[index] += vx * dtau.z; - dhloc_fixed_22[index] += vy * dtau.y; - dhloc_fixed_23[index] += vy * dtau.z; - dhloc_fixed_33[index] += vz * dtau.z; - } - - return; -} void LCAO_Matrix::zeros_HSgamma(const char &mtype) { @@ -421,154 +294,6 @@ void LCAO_Matrix::zeros_HSR(const char &mtype) return; } -// Peize Lin add vtype='A' 2018-11-30 -void LCAO_Matrix::print_HSk(const char &mtype, const char &vtype, const double &accuracy, std::ostream &os) -{ - ModuleBase::TITLE("LCAO_Matrix","print_HSk"); - if(mtype=='S') os << "Sloc2 matrix" << std::endl; - else if(mtype=='T') os << "Hloc_fixed2 matrix" << std::endl; - else if(mtype=='H') os << "Hloc2 matrix" << std::endl; - else - { - ModuleBase::WARNING_QUIT("LCAO_Matrix::print_HSk","Check input parameter: mtype."); - } - - if(vtype=='C') os << " Output norm." << std::endl; - else if(vtype=='R') os << " Output real part." << std::endl; - else if(vtype=='I') os << " Output imag part." << std::endl; - else if(vtype=='A') os << " Output std::complex." << std::endl; - - - os << std::setprecision(8) << std::endl; - for(int i=0; iParaV->nrow; i++) - { - os << " " ; - for(int j=0; jParaV->ncol; j++) - { - const int index = i * this->ParaV->ncol + j; - if(vtype=='A') - { - std::complex v; - if(mtype=='S') v = Sloc2[index]; - else if(mtype=='T') v = Hloc_fixed2[index]; - else if(mtype=='H') v = Hloc2[index]; - auto threshold = [accuracy]( const double v ){ return std::abs(v)>accuracy ? v : 0.0; }; - os << '(' << threshold(v.real()) << ',' << threshold(v.imag()) << "\t"; - } - else - { - double v=-888.888;//wrong number - if(vtype=='R') - { - if(mtype=='S') v = Sloc2[index].real(); - else if(mtype=='T') v = Hloc_fixed2[index].real(); - else if(mtype=='H') v = Hloc2[index].real(); - } - else if(vtype=='C') - { - if(mtype=='S') v = sqrt( norm ( Sloc2[index] ) ); - else if(mtype=='T') v = sqrt( norm ( Hloc_fixed2[index] ) ); - else if(mtype=='H') v = sqrt( norm ( Hloc2[index] ) ); - } - else if(vtype=='I') - { - if(mtype=='S') v = Sloc2[index].imag(); - else if(mtype=='T') v = Hloc_fixed2[index].imag(); - else if(mtype=='H') v = Hloc2[index].imag(); - } - - if( std::abs(v) > accuracy ) - { - os << v << "\t"; - } - else - { - os << "0" << "\t"; - } - } - } - os << std::endl; - } - os << std::endl; - os << std::setprecision(6) << std::endl; - return; -} - - -void LCAO_Matrix::print_HSgamma(const char &mtype, std::ostream &os) -{ - ModuleBase::TITLE("LCAO_Matrix","print_HSgamma"); - - GlobalV::ofs_running << " " << mtype << " matrix" << std::endl; - GlobalV::ofs_running << " nrow=" << this->ParaV->nrow << std::endl; - GlobalV::ofs_running << " ncol=" << this->ParaV->ncol << std::endl; - GlobalV::ofs_running << " element number = " << this->ParaV->ncol << std::endl; - - if (mtype=='S') - { - os << std::setprecision(8); - os << " print Sloc" << std::endl; - for(int i=0; iParaV->ncol+j]; - if( std::abs(v) > 1.0e-8) - { - os << std::setw(15) << v; - } - else - { - os << std::setw(15) << "0"; - } - }//end j - os << std::endl; - }//end i - } - if (mtype=='T') - { - os << " print Hloc_fixed" << std::endl; - for(int i=0; iParaV->ncol+j]; - if( std::abs(v) > 1.0e-8) - { - os << std::setw(15) << v; - } - else - { - os << std::setw(15) << "0"; - } - }//end j - os << std::endl; - }//end i - } - if (mtype=='H') - { - os << " print Hloc" << std::endl; - for(int i=0; iParaV->ncol+j]; - if( std::abs(v) > 1.0e-8) - { - os << std::setw(15) << v; - } - else - { - os << std::setw(15) << "0"; - } - }//end j - os << std::endl; - }//end i - } - - return; -} - // becareful! Update Hloc, we add new members to it. void LCAO_Matrix::update_Hloc(void) { diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h index eb0c31f85c..4b509c2631 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h @@ -213,43 +213,6 @@ class LCAO_Matrix const char &dtype, const int spin = 0); - void set_force ( - const Parallel_Orbitals &pv, - const int& iw1_all, - const int& iw2_all, - const double& vx, - const double& vy, - const double& vz, - const char &dtype, - double* dsloc_x, - double* dsloc_y, - double* dsloc_z, - double* dhloc_fixed_x, - double* dhloc_fixed_y, - double* dhloc_fixed_z); - - void set_stress ( - const Parallel_Orbitals &pv, - const int& iw1_all, - const int& iw2_all, - const double& vx, - const double& vy, - const double& vz, - const char &dtype, - const ModuleBase::Vector3 &dtau, - double* dsloc_11, - double* dsloc_12, - double* dsloc_13, - double* dsloc_22, - double* dsloc_23, - double* dsloc_33, - double* dhloc_fixed_11, - double* dhloc_fixed_12, - double* dhloc_fixed_13, - double* dhloc_fixed_22, - double* dhloc_fixed_23, - double* dhloc_fixed_33); - void set_HR_tr( const int &Rx, const int &Ry, @@ -272,14 +235,6 @@ class LCAO_Matrix void zeros_HSR(const char &mtype); - void print_HSgamma(const char &mtype, std::ostream &os=std::cout); - - void print_HSk( - const char &mtype, - const char &vtype = 'C', - const double &accuracy = 1.0e-5, - std::ostream &os=std::cout); - void update_Hloc(void); void update_Hloc2(const int &ik); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp new file mode 100644 index 0000000000..6f94f92572 --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp @@ -0,0 +1,259 @@ +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_base/timer.h" + + +#ifdef __MKL +#include // mkl_get_max_threads +#endif + +namespace LCAO_domain +{ + +void build_Nonlocal_beta_new( + LCAO_Matrix &lm, + double* HSloc, + const UnitCell &ucell, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + Grid_Driver* GridD) //update by liuyu 2021-04-07 +{ + ModuleBase::TITLE("LCAO_domain","b_NL_beta_new"); + ModuleBase::timer::tick ("LCAO_domain","b_NL_beta_new"); + + const Parallel_Orbitals* pv = lm.ParaV; + const int npol = GlobalV::NPOL; + +#ifdef __MKL + const int mkl_threads = mkl_get_max_threads(); + mkl_set_num_threads(1); +#endif + + const std::vector adjs_all = GridD->get_adjs(ucell); + +#ifdef _OPENMP + #pragma omp parallel + { + double* Nonlocal_thread; + Nonlocal_thread = new double[pv->nloc]; + ModuleBase::GlobalFunc::ZEROS(Nonlocal_thread, pv->nloc); + #pragma omp for schedule(dynamic) +#endif + for(int iat=0; iat, where beta runs over L0,M0 on atom I0 + //and psi runs over atomic basis sets on the current core + //======================================================= + #ifdef _OPENMP + std::vector>> nlm_tot_thread; + nlm_tot_thread.resize(adjs_all[iat].adj_num + 1); + #else + std::vector>> nlm_tot; + nlm_tot.resize(adjs_all[iat].adj_num + 1); + #endif + + const ModuleBase::Vector3 tau0 = atom0->tau[I0]; + const double Rcut_Beta = ucell.infoNL.Beta[T0].get_rcut_max(); + + //outermost loop : all adjacent atoms + for(int ad_count=0; ad_count < adjs_all[iat].adj_num + 1; ad_count++) + { + const int T1 = adjs_all[iat].ntype[ad_count]; + const int I1 = adjs_all[iat].natom[ad_count]; + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + const ModuleBase::Vector3 tau1 = adjs_all[iat].adjacent_tau[ad_count]; + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw*npol; + + #ifdef _OPENMP + nlm_tot_thread[ad_count].clear(); + #else + nlm_tot[ad_count].clear(); + #endif + + //middle loop : atomic basis on current processor (either row or column) + const double dist1 = (tau1-tau0).norm() * ucell.lat0; + if (dist1 > Rcut_Beta + Rcut_AO1) + { + continue; + } + + for(int iw1=0; iw1global2local_row(iw1_all); + const int iw2_local = pv->global2local_col(iw1_all); + + if(iw1_local < 0 && iw2_local < 0) continue; + + const int iw1_0 = iw1/npol; + std::vector> nlm; + //2D, but first dimension is only 1 here + //for force, the right hand side is the gradient + //and the first dimension is then 3 + //inner loop : all projectors (L0,M0) + + +#ifdef USE_NEW_TWO_CENTER + //================================================================= + // new two-center integral (temporary) + //================================================================= + int L1 = atom1->iw2l[ iw1_0 ]; + int N1 = atom1->iw2n[ iw1_0 ]; + int m1 = atom1->iw2m[ iw1_0 ]; + + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; + + ModuleBase::Vector3 dtau = ucell.atoms[T0].tau[I0] - tau1; + uot.two_center_bundle->overlap_orb_beta->snap( + T1, L1, N1, M1, T0, dtau * ucell.lat0, false, nlm); +#else + uot.snap_psibeta_half( + orb, + ucell.infoNL, + nlm, tau1, T1, + atom1->iw2l[ iw1_0 ], // L1 + atom1->iw2m[ iw1_0 ], // m1 + atom1->iw2n[ iw1_0 ], // N1 + ucell.atoms[T0].tau[I0], T0, 0); //R0,T0 +#endif + //================================================================= + // end of new two-center integral (temporary) + //================================================================= + + #ifdef _OPENMP + nlm_tot_thread[ad_count].insert({iw1_all,nlm[0]}); + #else + nlm_tot[ad_count].insert({iw1_all,nlm[0]}); + #endif + }//end iw + }//end ad + + //======================================================= + //Step2: + //calculate sum_(L0,M0) beta + //and accumulate the value to Hloc_fixed(i,j) + //======================================================= + for(int ad1_count=0; ad1_count < adjs_all[iat].adj_num + 1; ad1_count++) + { + const int T1 = adjs_all[iat].ntype[ad1_count]; + const int I1 = adjs_all[iat].natom[ad1_count]; + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); + const ModuleBase::Vector3 tau1 = adjs_all[iat].adjacent_tau[ad1_count]; + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw*npol; + const double Rcut_AO1 = orb.Phi[T1].getRcut(); + + for (int ad2_count=0; ad2_count < adjs_all[iat].adj_num + 1; ad2_count++) + { + const int T2 = adjs_all[iat].ntype[ad2_count]; + const int I2 = adjs_all[iat].natom[ad2_count]; + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); + const ModuleBase::Vector3 tau2 = adjs_all[iat].adjacent_tau[ad2_count]; + const Atom* atom2 = &ucell.atoms[T2]; + const int nw2_tot = atom2->nw*npol; + const double Rcut_AO2 = orb.Phi[T2].getRcut(); + const double dist1 = (tau1-tau0).norm() * ucell.lat0; + const double dist2 = (tau2-tau0).norm() * ucell.lat0; + + if (dist1 > Rcut_Beta + Rcut_AO1 + || dist2 > Rcut_Beta + Rcut_AO2) + { + continue; + } + + for(int iw1=0; iw1global2local_row(iw1_all); + if(iw1_local < 0) continue; + const int iw1_0 = iw1/npol; + for(int iw2=0; iw2global2local_col(iw2_all); + if(iw2_local < 0) continue; + const int iw2_0 = iw2/npol; + #ifdef _OPENMP + std::vector nlm1 = nlm_tot_thread[ad1_count][iw1_all]; + std::vector nlm2 = nlm_tot_thread[ad2_count][iw2_all]; + #else + std::vector nlm1 = nlm_tot[ad1_count][iw1_all]; + std::vector nlm2 = nlm_tot[ad2_count][iw2_all]; + #endif + + assert(nlm1.size()==nlm2.size()); + #ifdef _OPENMP + double nlm_thread=0.0; + #else + double nlm=0.0; + #endif + const int nproj = ucell.infoNL.nproj[T0]; + int ib = 0; + for(int nb = 0; nb < nproj; nb++) + { + const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); + for(int m=0;m<2*L0+1;m++) + { + #ifdef _OPENMP + nlm_thread += nlm1[ib]*nlm2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); + #else + nlm += nlm1[ib]*nlm2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); + #endif + ib+=1; + } + } + assert(ib==nlm1.size()); + + const int ir = pv->global2local_row(iw1_all); + const int ic = pv->global2local_col(iw2_all); + long index=0; + if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()) + { + index=ic*pv->nrow+ir; + } + else + { + index=ir*pv->ncol+ic; + } +#ifdef _OPENMP + Nonlocal_thread[index] += nlm_thread; +#else + lm.set_HSgamma(iw1_all, iw2_all, nlm, HSloc); +#endif + }//iw2 + }//iw1 + }//ad2 + }//ad1 + }//end iat + + #ifdef _OPENMP + #pragma omp critical(cal_nonlocal) + { + for(int i=0; inloc; i++) + { + lm.Hloc_fixed[i] += Nonlocal_thread[i]; + } + } + delete[] Nonlocal_thread; + #endif +#ifdef _OPENMP + } +#endif + +#ifdef __MKL + mkl_set_num_threads(mkl_threads); +#endif + + ModuleBase::timer::tick ("LCAO_domain","b_NL_beta_new"); + return; +} + +} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp new file mode 100644 index 0000000000..537c648ee5 --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_mu.cpp @@ -0,0 +1,602 @@ +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_base/timer.h" + +namespace LCAO_domain +{ + + +typedef std::tuple key_tuple; + +#include "record_adj.h" //mohan add 2012-07-06 + +void build_Nonlocal_mu_new( + LCAO_Matrix &lm, + double* NLloc, + const bool &calc_deri, + const UnitCell &ucell, + const LCAO_Orbitals &orb, + const ORB_gen_tables &uot, + Grid_Driver* GridD) +{ + ModuleBase::TITLE("LCAO_domain","vnl_mu_new"); + ModuleBase::timer::tick("LCAO_domain", "vnl_mu_new"); + const Parallel_Orbitals* pv = lm.ParaV; + + const int nspin = GlobalV::NSPIN; + const int npol = GlobalV::NPOL; + const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; + + // < phi1 | beta > < beta | phi2 > + // phi1 is within the unitcell. + // while beta is in the supercell. + // while phi2 is in the supercell. + + + //Step 1 : generate + + //This is the data structure for storing + //It is a 4 layer data structure + //The outmost layer is std::vector with size being number of atoms in unit cell + //The second layer is a map, the key being a combination of 4 number (iat, dRx, dRy, dRz) + //which identifies a unique adjacent atom of the first atom + //The third layer is an unordered map, with key being the index of atomic basis |psi> + //The inner layer is a vector, each element representing a projector |beta> + //It then either stores the number (nlm_tot) + //or a vector of 4, storing additionally (nlm_tot1) x_i=x,y,z + std::vector>>> nlm_tot; + std::vector>>>> nlm_tot1; + + if(!calc_deri) + { + nlm_tot.resize(ucell.nat); + } + else + { + nlm_tot1.resize(ucell.nat); + } +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) +#endif + for(int iat=0;iat tau = ucell.atoms[it].tau[ia]; + AdjacentAtomInfo adjs; + GridD->Find_atom(ucell, tau ,it, ia, &adjs); + + if(!calc_deri) + { + nlm_tot[iat].clear(); + } + else + { + nlm_tot1[iat].clear(); + } + + for (int ad=0; ad &tau1 = adjs.adjacent_tau[ad]; + const Atom* atom1 = &ucell.atoms[T1]; + const int nw1_tot = atom1->nw*npol; + + const ModuleBase::Vector3 dtau = tau1-tau; + const double dist1 = dtau.norm2() * pow(ucell.lat0,2); + + if (dist1 > pow(Rcut_Beta + Rcut_AO1,2)) + { + continue; + } + std::unordered_map> nlm_cur; + std::unordered_map>> nlm_cur1; + + if(!calc_deri) + { + nlm_cur.clear(); + } + else + { + nlm_cur1.clear(); + } + for (int iw1=0; iw1global2local_row(iw1_all); + const int iw2_local = pv->global2local_col(iw1_all); + if(iw1_local < 0 && iw2_local < 0)continue; + const int iw1_0 = iw1/npol; + std::vector> nlm; + //nlm is a vector of vectors, but size of outer vector is only 1 here + //If we are calculating force, we need also to store the gradient + //and size of outer vector is then 4 + //inner loop : all projectors (L0,M0) + +#ifdef USE_NEW_TWO_CENTER + //================================================================= + // new two-center integral (temporary) + //================================================================= + int L1 = atom1->iw2l[ iw1_0 ]; + int N1 = atom1->iw2n[ iw1_0 ]; + int m1 = atom1->iw2m[ iw1_0 ]; + + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; + + ModuleBase::Vector3 dtau = tau - tau1; + uot.two_center_bundle->overlap_orb_beta->snap( + T1, L1, N1, M1, it, dtau * ucell.lat0, calc_deri, nlm); +#else + uot.snap_psibeta_half( + orb, + ucell.infoNL, + nlm, tau1, T1, + atom1->iw2l[ iw1_0 ], // L1 + atom1->iw2m[ iw1_0 ], // m1 + atom1->iw2n[ iw1_0 ], // N1 + tau, it, calc_deri); //R0,T0 +#endif + //================================================================= + // end of new two-center integral (temporary) + //================================================================= + + if(!calc_deri) + { + nlm_cur.insert({iw1_all,nlm[0]}); + } + else + { + nlm_cur1.insert({iw1_all,nlm}); + } + }//end iw + + const int iat1=ucell.itia2iat(T1, I1); + const int rx1=adjs.box[ad].x; + const int ry1=adjs.box[ad].y; + const int rz1=adjs.box[ad].z; + key_tuple key_1(iat1,rx1,ry1,rz1); + + if(!calc_deri) + { + nlm_tot[iat][key_1]=nlm_cur; + } + else + { + nlm_tot1[iat][key_1]=nlm_cur1; + } + }//end ad + } + + + //======================================================= + //Step2: + //calculate sum_(L0,M0) beta + //and accumulate the value to Hloc_fixedR(i,j) + //======================================================= + int total_nnr = 0; +#ifdef _OPENMP +#pragma omp parallel reduction(+:total_nnr) +{ +#endif + ModuleBase::Vector3 tau1, tau2, dtau; + ModuleBase::Vector3 dtau1, dtau2, tau0; + double distance = 0.0; + double rcut = 0.0; + double rcut1 = 0.0; + double rcut2 = 0.0; + + // Record_adj RA; + // RA.for_2d(); + + // psi1 +#ifdef _OPENMP +// use schedule(dynamic) for load balancing because adj_num is various +#pragma omp for schedule(dynamic) +#endif + for (int iat1 = 0; iat1 < ucell.nat; iat1++) + { + const int T1 = ucell.iat2it[iat1]; + const Atom* atom1 = &ucell.atoms[T1]; + const int I1 = ucell.iat2ia[iat1]; + { + //GridD->Find_atom( atom1->tau[I1] ); + AdjacentAtomInfo adjs; + GridD->Find_atom(ucell, atom1->tau[I1] ,T1, I1, &adjs); + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); + // Record_adj.for_2d() may not called in some case + int nnr = pv->nlocstart ? pv->nlocstart[iat1] : 0; + tau1 = atom1->tau[I1]; + + // psi2 + for (int ad2=0; ad2= rcut) + { + for (int ad0 = 0; ad0 < adjs.adj_num+1; ++ad0) + { + const int T0 = adjs.ntype[ad0]; + + tau0 = adjs.adjacent_tau[ad0]; + dtau1 = tau0 - tau1; + dtau2 = tau0 - tau2; + + const double distance1 = dtau1.norm2() * pow(ucell.lat0,2); + const double distance2 = dtau2.norm2() * pow(ucell.lat0,2); + + rcut1 = pow(orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); + rcut2 = pow(orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); + + if( distance1 < rcut1 && distance2 < rcut2 ) + { + is_adj = true; + break; + } + } + } + + + if(is_adj) + { + // < psi1 | all projectors | psi2 > + // ----------------------------- enter the nnr increaing zone ------------------------- + for (int ad0=0; ad0 < adjs.adj_num+1 ; ++ad0) + { + const int T0 = adjs.ntype[ad0]; + const int I0 = adjs.natom[ad0]; + const int iat = ucell.itia2iat(T0,I0); + + // mohan add 2010-12-19 + if( ucell.infoNL.nproj[T0] == 0) + { + continue; + } + + tau0 = adjs.adjacent_tau[ad0]; + + dtau1 = tau0 - tau1; + dtau2 = tau0 - tau2; + const double distance1 = dtau1.norm2() * pow(ucell.lat0,2); + const double distance2 = dtau2.norm2() * pow(ucell.lat0,2); + + // seems a bug here!! mohan 2011-06-17 + rcut1 = pow(orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); + rcut2 = pow(orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(),2); + + if(distance1 >= rcut1 || distance2 >= rcut2) + { + continue; + } + //const Atom* atom0 = &ucell.atoms[T0]; + const int rx0=adjs.box[ad0].x; + const int ry0=adjs.box[ad0].y; + const int rz0=adjs.box[ad0].z; + key_tuple key1(iat1,-rx0,-ry0,-rz0); + key_tuple key2(iat2,rx2-rx0,ry2-ry0,rz2-rz0); + + std::unordered_map> *nlm_cur1_e; //left hand side, for energy + std::unordered_map>> *nlm_cur1_f; //lhs, for force + std::unordered_map> *nlm_cur2_e; //rhs, for energy + std::unordered_map>> *nlm_cur2_f; //rhs, for force + + if(!calc_deri) + { + nlm_cur1_e = &nlm_tot[iat][key1]; + nlm_cur2_e = &nlm_tot[iat][key2]; + } + else + { + nlm_cur1_f = &nlm_tot1[iat][key1]; + nlm_cur2_f = &nlm_tot1[iat][key2]; + } + + int nnr_inner = 0; + + for (int j=0; jnw*npol; j++) + { + const int j0 = j/npol;//added by zhengdy-soc + const int iw1_all = start1 + j; + const int mu = pv->global2local_row(iw1_all); + if(mu < 0)continue; + + // fix a serious bug: atom2[T2] -> atom2 + // mohan 2010-12-20 + for (int k=0; knw*npol; k++) + { + const int k0 = k/npol; + const int iw2_all = start2 + k; + const int nu = pv->global2local_col(iw2_all); + if(nu < 0)continue; + + if(!calc_deri) + { + std::vector nlm_1=(*nlm_cur1_e)[iw1_all]; + std::vector nlm_2=(*nlm_cur2_e)[iw2_all]; + if(nspin == 4) + { + std::complex nlm_tmp = ModuleBase::ZERO; + int is0 = (j-j0*npol) + (k-k0*npol)*2; + for (int no = 0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) + { + const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; + const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; + nlm_tmp += nlm_1[p1] * nlm_2[p2] * ucell.atoms[T0].ncpp.d_so(is0, p2, p1); + } + lm.Hloc_fixedR_soc[nnr+nnr_inner] += nlm_tmp; + } + else if(nspin == 2 || nspin == 1) + { + double nlm_tmp = 0.0; + const int nproj = ucell.infoNL.nproj[T0]; + int ib = 0; + for (int nb = 0; nb < nproj; nb++) + { + const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); + for(int m=0;m<2*L0+1;m++) + { + if(nlm_1[ib]!=0.0 && nlm_2[ib]!=0.0) + { + nlm_tmp += nlm_1[ib]*nlm_2[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); + } + ib+=1; + } + } + assert(ib==nlm_1.size()); + + if(gamma_only_local) + { + // mohan add 2010-12-20 + if( nlm_tmp!=0.0 ) + { + lm.set_HSgamma(iw1_all, iw2_all, nlm_tmp, NLloc);//N stands for nonlocal. + } + } + else + { + if( nlm_tmp!=0.0 ) + { + NLloc[nnr+nnr_inner] += nlm_tmp; + } + } + }// end nspin + }// calc_deri + else // calculate the derivative + { + if (nspin == 4) + { + std::vector nlm_1 = (*nlm_cur2_f)[iw2_all][0]; + std::vector> nlm_2; + nlm_2.resize(3); + for (int i=0; i< 3; i++) + { + nlm_2[i] = (*nlm_cur1_f)[iw1_all][i+1]; + } + std::complex nlm[4][3] = {ModuleBase::ZERO}; + int is0 = (j-j0*npol) + (k-k0*npol)*2; + for (int no=0; no < ucell.atoms[T0].ncpp.non_zero_count_soc[is0]; no++) + { + const int p1 = ucell.atoms[T0].ncpp.index1_soc[is0][no]; + const int p2 = ucell.atoms[T0].ncpp.index2_soc[is0][no]; + if (is0 == 0) + { + lm.DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + lm.DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + lm.DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + } + else if (is0 == 1) + { + lm.DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; + lm.DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; + lm.DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(1, p2, p1).real() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).real())*0.5; + } + else if (is0 == 2) + { + lm.DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* + (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; + lm.DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* + (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; + lm.DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* + (-ucell.atoms[T0].ncpp.d_so(1, p2, p1).imag() + + ucell.atoms[T0].ncpp.d_so(2, p2, p1).imag())*0.5; + } + else if (is0 == 3) + { + lm.DHloc_fixedR_x[nnr+nnr_inner] += nlm_2[0][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + lm.DHloc_fixedR_y[nnr+nnr_inner] += nlm_2[1][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + lm.DHloc_fixedR_z[nnr+nnr_inner] += nlm_2[2][p1]*nlm_1[p2]* + (ucell.atoms[T0].ncpp.d_so(0, p2, p1).real() + - ucell.atoms[T0].ncpp.d_so(3, p2, p1).real())*0.5; + } + } + } + else if (nspin == 1 || nspin == 2) + { + if(gamma_only_local) + { + double nlm[3]={0,0,0}; + + // sum all projectors for one atom. + std::vector nlm_1 = (*nlm_cur1_f)[iw1_all][0]; + std::vector> nlm_2; + nlm_2.resize(3); + for(int i=0;i<3;i++) + { + nlm_2[i] = (*nlm_cur2_f)[iw2_all][i+1]; + } + + assert(nlm_1.size()==nlm_2[0].size()); + + const int nproj = ucell.infoNL.nproj[T0]; + int ib = 0; + for (int nb = 0; nb < nproj; nb++) + { + const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); + for(int m=0;m<2*L0+1;m++) + { + for(int ir=0;ir<3;ir++) + { + nlm[ir] += nlm_2[ir][ib]*nlm_1[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); + } + ib+=1; + } + } + assert(ib==nlm_1.size()); + + LCAO_domain::set_force( + *lm.ParaV, + iw1_all, + iw2_all, + nlm[0], + nlm[1], + nlm[2], + 'N', + lm.DSloc_x, + lm.DSloc_y, + lm.DSloc_z, + lm.DHloc_fixed_x, + lm.DHloc_fixed_y, + lm.DHloc_fixed_z); + + } + else + { + // mohan change the order on 2011-06-17 + // origin: < psi1 | beta > < beta | dpsi2/dtau > + //now: < psi1/dtau | beta > < beta | psi2 > + double nlm[3]={0,0,0}; + + // sum all projectors for one atom. + std::vector nlm_1 = (*nlm_cur2_f)[iw2_all][0]; + std::vector> nlm_2; + nlm_2.resize(3); + for(int i=0;i<3;i++) + { + nlm_2[i] = (*nlm_cur1_f)[iw1_all][i+1]; + } + + assert(nlm_1.size()==nlm_2[0].size()); + + const int nproj = ucell.infoNL.nproj[T0]; + int ib = 0; + for (int nb = 0; nb < nproj; nb++) + { + const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL(); + for(int m=0;m<2*L0+1;m++) + { + for(int ir=0;ir<3;ir++) + { + nlm[ir] += nlm_2[ir][ib]*nlm_1[ib]*ucell.atoms[T0].ncpp.dion(nb,nb); + } + ib+=1; + } + } + assert(ib==nlm_1.size()); + + lm.DHloc_fixedR_x[nnr+nnr_inner] += nlm[0]; + lm.DHloc_fixedR_y[nnr+nnr_inner] += nlm[1]; + lm.DHloc_fixedR_z[nnr+nnr_inner] += nlm[2]; + } + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_Nonlocal_mu_new","nspin must be 1, 2 or 4"); + } + }//!calc_deri + nnr_inner++; + }// k + } // j + } // ad0 + + //outer circle : accumulate nnr + for (int j=0; jnw*npol; j++) + { + const int j0 = j/npol;//added by zhengdy-soc + const int iw1_all = start1 + j; + const int mu = pv->global2local_row(iw1_all); + if(mu < 0) + { + continue; + } + + // fix a serious bug: atom2[T2] -> atom2 + // mohan 2010-12-20 + for (int k=0; knw*npol; k++) + { + const int k0 = k/npol; + const int iw2_all = start2 + k; + const int nu = pv->global2local_col(iw2_all); + if(nu < 0) + { + continue; + } + total_nnr++; + nnr++; + } + } + }// end is_adj + } // ad2 + } // I1 + } // T1 +#ifdef _OPENMP +} +#endif + if(!gamma_only_local) + { + if( total_nnr!=pv->nnr) + { + GlobalV::ofs_running << " nr=" << total_nnr << std::endl; + GlobalV::ofs_running << " pv->nnr=" << pv->nnr << std::endl; + ModuleBase::WARNING_QUIT("LCAO_domain::build_Nonlocal_mu_new","nnr!=LNNR.nnr"); + } + } + + ModuleBase::timer::tick("LCAO_domain", "vnl_mu_new"); + return; +} + + +} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_fs.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_fs.cpp new file mode 100644 index 0000000000..e7ec844448 --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_fs.cpp @@ -0,0 +1,134 @@ +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" + +namespace LCAO_domain +{ + +void set_force +( + const Parallel_Orbitals &pv, + const int &iw1_all, + const int &iw2_all, + const double& vx, + const double& vy, + const double& vz, + const char &dtype, + double* dsloc_x, + double* dsloc_y, + double* dsloc_z, + double* dhloc_fixed_x, + double* dhloc_fixed_y, + double* dhloc_fixed_z) +{ + // use iw1_all and iw2_all to set Hloc + // becareful! The ir and ic may < 0!!!!!!!!!!!!!!!! + const int ir = pv.global2local_row(iw1_all); + const int ic = pv.global2local_col(iw2_all); + const long index = ir * pv.ncol + ic; + + if( index >= pv.nloc) + { + std::cout << " iw1_all = " << iw1_all << std::endl; + std::cout << " iw2_all = " << iw2_all << std::endl; + std::cout << " ir = " << ir << std::endl; + std::cout << " ic = " << ic << std::endl; + std::cout << " index = " << index << std::endl; + std::cout << " pv.nloc = " << pv.nloc << std::endl; + ModuleBase::WARNING_QUIT("LCAO_Matrix","set_force"); + } + + if (dtype == 'S') + { + dsloc_x[index] += vx; + dsloc_y[index] += vy; + dsloc_z[index] += vz; + } + else if (dtype == 'T') + { + // notice, the sign is '-', minus. + dhloc_fixed_x[index] -= vx; + dhloc_fixed_y[index] -= vy; + dhloc_fixed_z[index] -= vz; + } + else if (dtype == 'N') + { + dhloc_fixed_x[index] += vx; + dhloc_fixed_y[index] += vy; + dhloc_fixed_z[index] += vz; + } + + return; +} + +void set_stress +( + const Parallel_Orbitals &pv, + const int &iw1_all, + const int &iw2_all, + const double& vx, + const double& vy, + const double& vz, + const char &dtype, + const ModuleBase::Vector3 &dtau, + double* dsloc_11, + double* dsloc_12, + double* dsloc_13, + double* dsloc_22, + double* dsloc_23, + double* dsloc_33, + double* dhloc_fixed_11, + double* dhloc_fixed_12, + double* dhloc_fixed_13, + double* dhloc_fixed_22, + double* dhloc_fixed_23, + double* dhloc_fixed_33) +{ + // use iw1_all and iw2_all to set Hloc + // becareful! The ir and ic may < 0!!!!!!!!!!!!!!!! + const int ir = pv.global2local_row(iw1_all); + const int ic = pv.global2local_col(iw2_all); + const long index = ir * pv.ncol + ic; + + if( index >= pv.nloc) + { + std::cout << " iw1_all = " << iw1_all << std::endl; + std::cout << " iw2_all = " << iw2_all << std::endl; + std::cout << " ir = " << ir << std::endl; + std::cout << " ic = " << ic << std::endl; + std::cout << " index = " << index << std::endl; + std::cout << " pv.nloc = " << pv.nloc << std::endl; + ModuleBase::WARNING_QUIT("LCAO_Matrix","set_stress"); + } + + if (dtype == 'S') + { + dsloc_11[index] += vx * dtau.x; + dsloc_12[index] += vx * dtau.y; + dsloc_13[index] += vx * dtau.z; + dsloc_22[index] += vy * dtau.y; + dsloc_23[index] += vy * dtau.z; + dsloc_33[index] += vz * dtau.z; + } + else if (dtype == 'T') + { + // notice, the sign is '-', minus. + dhloc_fixed_11[index] -= vx * dtau.x; + dhloc_fixed_12[index] -= vx * dtau.y; + dhloc_fixed_13[index] -= vx * dtau.z; + dhloc_fixed_22[index] -= vy * dtau.y; + dhloc_fixed_23[index] -= vy * dtau.z; + dhloc_fixed_33[index] -= vz * dtau.z; + } + else if (dtype == 'N') + { + dhloc_fixed_11[index] += vx * dtau.x; + dhloc_fixed_12[index] += vx * dtau.y; + dhloc_fixed_13[index] += vx * dtau.z; + dhloc_fixed_22[index] += vy * dtau.y; + dhloc_fixed_23[index] += vy * dtau.z; + dhloc_fixed_33[index] += vz * dtau.z; + } + + return; +} + +} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp new file mode 100644 index 0000000000..873c8099ec --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_set_st.cpp @@ -0,0 +1,624 @@ +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_base/timer.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" // only for INPUT + +namespace LCAO_domain +{ + +void single_derivative( + LCAO_Matrix& lm, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + const Parallel_Orbitals& pv, + const UnitCell& ucell, + const int nspin, + const bool cal_stress, + const int iw1_all, + const int iw2_all, + const int m1, + const int m2, + const char &dtype, + const int T1, + const int L1, + const int N1, + const int T2, + const int L2, + const int N2, + const ModuleBase::Vector3 &dtau, + const ModuleBase::Vector3 &tau1, + const ModuleBase::Vector3 &tau2, + const int npol, + const int jj, + const int jj0, + const int kk, + const int kk0, + int& nnr, + int& total_nnr, + double *olm // output value +) +{ + + const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; + +#ifdef USE_NEW_TWO_CENTER + //================================================================= + // new two-center integral (temporary) + //================================================================= + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + const int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; + const int M2 = (m2 % 2 == 0) ? -m2/2 : (m2+1)/2; + switch (dtype) + { + case 'S': + uot.two_center_bundle->overlap_orb->calculate( + T1, + L1, + N1, + M1, + T2, + L2, + N2, + M2, + dtau * ucell.lat0, + nullptr, + olm); + break; + case 'T': + uot.two_center_bundle->kinetic_orb->calculate( + T1, + L1, + N1, + M1, + T2, + L2, + N2, + M2, + dtau * ucell.lat0, + nullptr, + olm); + break; + default: // not supposed to happen + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","dtype must be S or T"); + } +#else + uot.snap_psipsi( orb, olm, 1, dtype, + tau1, T1, L1, m1, N1, + tau2, T2, L2, m2, N2 + ); +#endif + + //================================================================= + // end of new two-center integral (temporary) + //================================================================= + + // condition 7: gamma only or multiple k + if(gamma_only_local) + { + LCAO_domain::set_force( + pv, + iw1_all, + iw2_all, + olm[0], + olm[1], + olm[2], + dtype, + lm.DSloc_x, + lm.DSloc_y, + lm.DSloc_z, + lm.DHloc_fixed_x, + lm.DHloc_fixed_y, + lm.DHloc_fixed_z); + + if(cal_stress) + { + LCAO_domain::set_stress( + pv, + iw1_all, + iw2_all, + olm[0], + olm[1], + olm[2], + dtype, + dtau, + lm.DSloc_11, + lm.DSloc_12, + lm.DSloc_13, + lm.DSloc_22, + lm.DSloc_23, + lm.DSloc_33, + lm.DHloc_fixed_11, + lm.DHloc_fixed_12, + lm.DHloc_fixed_13, + lm.DHloc_fixed_22, + lm.DHloc_fixed_23, + lm.DHloc_fixed_33); + }// end stress + }// end gamma_only + else // condition 7, multiple k-points algorithm + { + // condition 8, S or T + if(dtype=='S') + { + // condition 9, nspin + if (nspin == 1 || nspin ==2) + { + lm.DSloc_Rx[nnr] = olm[0]; + lm.DSloc_Ry[nnr] = olm[1]; + lm.DSloc_Rz[nnr] = olm[2]; + } + else if (nspin == 4) + { + int is = (jj-jj0*npol) + (kk-kk0*npol)*2; + if (is == 0) // is==3 is not needed in force calculation + { + lm.DSloc_Rx[nnr] = olm[0]; + lm.DSloc_Ry[nnr] = olm[1]; + lm.DSloc_Rz[nnr] = olm[2]; + } + else + { + lm.DSloc_Rx[nnr] = 0.0; + lm.DSloc_Ry[nnr] = 0.0; + lm.DSloc_Rz[nnr] = 0.0; + } + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","nspin must be 1, 2 or 4"); + }// end condition 9, nspin + + if(cal_stress) + { + lm.DH_r[nnr*3] = dtau.x; + lm.DH_r[nnr*3 + 1] = dtau.y; + lm.DH_r[nnr*3 + 2] = dtau.z; + } + } + else if(dtype=='T') // condition 8, S or T + { + // condtion 9, nspin + if (nspin == 1 || nspin ==2) + { + lm.DHloc_fixedR_x[nnr] = olm[0]; + lm.DHloc_fixedR_y[nnr] = olm[1]; + lm.DHloc_fixedR_z[nnr] = olm[2]; + if(cal_stress) + { + lm.stvnl11[nnr] = olm[0] * dtau.x; + lm.stvnl12[nnr] = olm[0] * dtau.y; + lm.stvnl13[nnr] = olm[0] * dtau.z; + lm.stvnl22[nnr] = olm[1] * dtau.y; + lm.stvnl23[nnr] = olm[1] * dtau.z; + lm.stvnl33[nnr] = olm[2] * dtau.z; + } + } + else if (nspin == 4)// condition 9 + { + const int is = (jj-jj0*npol) + (kk-kk0*npol)*2; + // condition 10, details of nspin 4 + if (is == 0) // is==3 is not needed in force calculation + { + lm.DHloc_fixedR_x[nnr] = olm[0]; + lm.DHloc_fixedR_y[nnr] = olm[1]; + lm.DHloc_fixedR_z[nnr] = olm[2]; + if(cal_stress) + { + lm.stvnl11[nnr] = olm[0] * dtau.x; + lm.stvnl12[nnr] = olm[0] * dtau.y; + lm.stvnl13[nnr] = olm[0] * dtau.z; + lm.stvnl22[nnr] = olm[1] * dtau.y; + lm.stvnl23[nnr] = olm[1] * dtau.z; + lm.stvnl33[nnr] = olm[2] * dtau.z; + } + } + else if (is == 1 || is == 2 || is == 3) + { + lm.DHloc_fixedR_x[nnr] = 0.0; + lm.DHloc_fixedR_y[nnr] = 0.0; + lm.DHloc_fixedR_z[nnr] = 0.0; + if(cal_stress) + { + lm.stvnl11[nnr] = 0.0; + lm.stvnl12[nnr] = 0.0; + lm.stvnl13[nnr] = 0.0; + lm.stvnl22[nnr] = 0.0; + lm.stvnl23[nnr] = 0.0; + lm.stvnl33[nnr] = 0.0; + } + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","is must be 0, 1, 2, 3"); + }// end condition 10, details of spin 4 + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","nspin must be 1, 2 or 4"); + }// end condition 9, nspin + }// end condition 8, S or T + ++total_nnr; + ++nnr; + }// end condition 7, gamma or multiple k +} + + +void single_overlap( + LCAO_Matrix& lm, + const LCAO_Orbitals& orb, + const ORB_gen_tables& uot, + const Parallel_Orbitals& pv, + const UnitCell& ucell, + const int nspin, + const bool cal_stress, + const int iw1_all, + const int iw2_all, + const int m1, + const int m2, + const char &dtype, + const int T1, + const int L1, + const int N1, + const int T2, + const int L2, + const int N2, + const ModuleBase::Vector3 &dtau, + const ModuleBase::Vector3 &tau1, + const ModuleBase::Vector3 &tau2, + const int npol, + const int jj, + const int jj0, + const int kk, + const int kk0, + int& nnr, // output value + int& total_nnr, // output value + double *olm, // output value + double *HSloc // output value +) +{ + const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; + +#ifdef USE_NEW_TWO_CENTER + //================================================================= + // new two-center integral (temporary) + //================================================================= + // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l) + const int M1 = (m1 % 2 == 0) ? -m1/2 : (m1+1)/2; + const int M2 = (m2 % 2 == 0) ? -m2/2 : (m2+1)/2; + + switch (dtype) + { + case 'S': + uot.two_center_bundle->overlap_orb->calculate(T1, L1, N1, M1, + T2, L2, N2, M2, dtau * ucell.lat0, olm); + break; + case 'T': + uot.two_center_bundle->kinetic_orb->calculate(T1, L1, N1, M1, + T2, L2, N2, M2, dtau * ucell.lat0, olm); + break; + default: // not supposed to happen + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","dtype must be S or T"); + } +#else + uot.snap_psipsi( orb, olm, 0, dtype, + tau1, T1, L1, m1, N1, // info of atom1 + adjs.adjacent_tau[ad], T2, L2, m2, N2, // info of atom2 + cal_syns, + dmax); +#endif + + //================================================================= + // end of new two-center integral (temporary) + //================================================================= + + // When NSPIN == 4 , only diagonal term is calculated for T or S Operators + // use olm1 to store the diagonal term with complex data type. + std::complex olm1[4]; + + if(nspin == 4) + { + olm1[0] = std::complex(olm[0], 0.0); + olm1[1] = ModuleBase::ZERO; + olm1[2] = ModuleBase::ZERO; + olm1[3] = std::complex(olm[0], 0.0); + } + + + // condition 7, gamma only or multiple k-points + if(gamma_only_local) + { + // mohan add 2010-06-29 + // set the value in Hloc and Sloc + // according to global2local_row and global2local_col + // the last paramete: 1 for Sloc, 2 for Hloc + // and 3 for Hloc_fixed. + lm.set_HSgamma(iw1_all, iw2_all, olm[0], HSloc); + } + else // condition 7, multiple k-points algorithm + { + // condition 8, S or T + if(dtype=='S') + { + // condition 9, nspin + if (nspin == 1 || nspin ==2) + { + HSloc[nnr] = olm[0]; + } + else if (nspin == 4) + {//only has diagonal term here. + const int is = (jj-jj0*npol) + (kk-kk0*npol)*2; + lm.SlocR_soc[nnr] = olm1[is]; + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","nspin must be 1, 2 or 4"); + } + } + else if(dtype=='T') // condition 8, S or T + { + // condition 9, nspin + if(nspin == 1 || nspin ==2) + { + HSloc[nnr] = olm[0];// + } + else if (nspin == 4) + {//only has diagonal term here. + const int is = (jj-jj0*npol) + (kk-kk0*npol)*2; + lm.Hloc_fixedR_soc[nnr] = olm1[is]; + } + else + { + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","nspin must be 1, 2 or 4"); + } + }// end condition 8, S or T + ++total_nnr; + ++nnr; + }// end condition 7, gamma point or multiple k-points +} + + +void build_ST_new( + LCAO_Matrix& lm, + const char& dtype, + const bool& calc_deri, + const UnitCell &ucell, + const LCAO_Orbitals& orb, + const Parallel_Orbitals& pv, + const ORB_gen_tables& uot, + Grid_Driver* GridD, + double* HSloc, + bool cal_syns, + double dmax) +{ + ModuleBase::TITLE("LCAO_domain","build_ST_new"); + ModuleBase::timer::tick("LCAO_domain","build_ST_new"); + + const int nspin = GlobalV::NSPIN; + const int npol = GlobalV::NPOL; + const bool cal_stress = GlobalV::CAL_STRESS; + const bool gamma_only_local = GlobalV::GAMMA_ONLY_LOCAL; + + int total_nnr = 0; +#ifdef _OPENMP +#pragma omp parallel reduction(+:total_nnr) +{ +#endif + //array to store data + double olm[3]={0.0,0.0,0.0}; + + //\sum{T} e**{ikT} <\phi_{ia}|d\phi_{k\beta}(T)> + ModuleBase::Vector3 tau1, tau2, dtau; + ModuleBase::Vector3 dtau1, dtau2, tau0; + +#ifdef _OPENMP +// use schedule(dynamic) for load balancing because adj_num is various +#pragma omp for schedule(dynamic) +#endif + for (int iat1 = 0; iat1 < ucell.nat; iat1++) // loop 1, iat1 + { + const int T1 = ucell.iat2it[iat1]; + const Atom* atom1 = &ucell.atoms[T1]; + const int I1 = ucell.iat2ia[iat1]; + + tau1 = atom1->tau[I1]; + + //GridD->Find_atom(tau1); + AdjacentAtomInfo adjs; + GridD->Find_atom(ucell, tau1, T1, I1, &adjs); + // Record_adj.for_2d() may not called in some case + int nnr = pv.nlocstart ? pv.nlocstart[iat1] : 0; + + if (cal_syns) + { + for (int k = 0; k < 3; k++) + { + tau1[k] = tau1[k] - atom1->vel[I1][k] * INPUT.mdp.md_dt / ucell.lat0 ; + } + } + + // loop 2, ad + for (int ad = 0; ad < adjs.adj_num+1; ++ad) + { + const int T2 = adjs.ntype[ad]; + const int I2 = adjs.natom[ad]; + Atom* atom2 = &ucell.atoms[T2]; + tau2 = adjs.adjacent_tau[ad]; + dtau = tau2 - tau1; + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); + + // condition 3, distance + if(distance < rcut) + { + int iw1_all = ucell.itiaiw2iwt( T1, I1, 0) ; //iw1_all = combined index (it, ia, iw) + + // loop 4, jj + for(int jj=0; jjnw*npol; ++jj) + { + const int jj0 = jj/npol; + const int L1 = atom1->iw2l[jj0]; + const int N1 = atom1->iw2n[jj0]; + const int m1 = atom1->iw2m[jj0]; + + int iw2_all = ucell.itiaiw2iwt( T2, I2, 0);//zhengdy-soc + + // loop 5, kk + for(int kk=0; kknw*npol; ++kk) + { + const int kk0 = kk/npol; + const int L2 = atom2->iw2l[kk0]; + const int N2 = atom2->iw2n[kk0]; + const int m2 = atom2->iw2m[kk0]; + + // mohan add 2010-06-29 + // this is in fact the same as in build_Nonlocal_mu, + // the difference is that here we use {L,N,m} for ccycle, + // build_Nonlocal_mu use atom.nw for cycle. + // so, here we use ParaO::in_this_processor, + // in build_Non... use global2local_row + // and global2local_col directly, + if (!pv.in_this_processor(iw1_all, iw2_all)) + { + ++iw2_all; + continue; + } + + olm[0] = 0.0; + olm[1] = 0.0; + olm[2] = 0.0; + + // condition 6, not calculate the derivative + if(!calc_deri) + { + single_overlap( + lm, + orb, + uot, + pv, + ucell, + nspin, + cal_stress, + iw1_all, + iw2_all, + m1, + m2, + dtype, + T1, + L1, + N1, + T2, + L2, + N2, + dtau, + tau1, + tau2, + npol, + jj, + jj0, + kk, + kk0, + nnr, + total_nnr, + olm, + HSloc); + } + else // condition 6, calculate the derivative + { + single_derivative( + lm, + orb, + uot, + pv, + ucell, + nspin, + cal_stress, + iw1_all, + iw2_all, + m1, + m2, + dtype, + T1, + L1, + N1, + T2, + L2, + N2, + dtau, + tau1, + tau2, + npol, + jj, + jj0, + kk, + kk0, + nnr, + total_nnr, + olm); + }// end condition 6, calc_deri + ++iw2_all; + }// end loop 5, kk + ++iw1_all; + }// end loop 4, jj + }// condition 3, distance + else if(distance>=rcut && (!gamma_only_local)) + { + int start1 = ucell.itiaiw2iwt( T1, I1, 0); + int start2 = ucell.itiaiw2iwt( T2, I2, 0); + + bool is_adj = false; + for (int ad0=0; ad0 < adjs.adj_num+1; ++ad0) + { + const int T0 = adjs.ntype[ad0]; + tau0 = adjs.adjacent_tau[ad0]; + dtau1 = tau0 - tau1; + double distance1 = dtau1.norm() * ucell.lat0; + double rcut1 = orb.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); + dtau2 = tau0 - tau2; + double distance2 = dtau2.norm() * ucell.lat0; + double rcut2 = orb.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max(); + if( distance1 < rcut1 && distance2 < rcut2 ) + { + is_adj = true; + break; + } + }//ad0 + + if( is_adj ) + { + for(int jj=0; jjnw * npol; ++jj) + { + const int mu = pv.global2local_row(start1 + jj); + if(mu<0)continue; + for(int kk=0; kknw * npol; ++kk) + { + const int nu = pv.global2local_col(start2 + kk); + if(nu<0)continue; + ++total_nnr; + ++nnr; + }//kk + }//jj + } // is_adj + }//distance, end condition 3 + }// end loop 2, ad + }// end loop 1, iat1 + + +#ifdef _OPENMP +} +#endif + + + if(!gamma_only_local) + { + if(total_nnr != pv.nnr) + { + std::cout << " nnr=" << total_nnr << " LNNR.nnr=" << pv.nnr << std::endl; + GlobalV::ofs_running << " nnr=" << total_nnr << " LNNR.nnr=" << pv.nnr << std::endl; + ModuleBase::WARNING_QUIT("LCAO_domain::build_ST_new","nnr != LNNR.nnr"); + } + } + + ModuleBase::timer::tick("LCAO_domain","build_ST_new"); + return; +} + +} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fedm_gamma.cpp similarity index 62% rename from source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp rename to source/module_hamilt_lcao/hamilt_lcaodft/fedm_gamma.cpp index 62a620ae94..c2918bc00e 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_edm.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/fedm_gamma.cpp @@ -10,9 +10,11 @@ // force due to the overlap matrix. // need energy density matrix here. template<> -void Force_LCAO::cal_foverlap( +void Force_LCAO::cal_fedm( const bool isforce, const bool isstress, + const UnitCell& ucell, + const elecstate::DensityMatrix* dm, const psi::Psi* psi, const Parallel_Orbitals& pv, const elecstate::ElecState *pelec, @@ -20,50 +22,50 @@ void Force_LCAO::cal_foverlap( ModuleBase::matrix &foverlap, ModuleBase::matrix& soverlap, const K_Vectors* kv, - Record_adj* ra, - const elecstate::DensityMatrix* DM) + Record_adj* ra) { - ModuleBase::TITLE("Force_LCAO_gamma","cal_foverlap"); - ModuleBase::timer::tick("Force_LCAO_gamma","cal_foverlap"); + ModuleBase::TITLE("Force_LCAO","cal_fedm"); + ModuleBase::timer::tick("Force_LCAO","cal_fedm"); - ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d"); + const int nspin = GlobalV::NSPIN; + const int nbands = GlobalV::NBANDS; + const int nlocal = GlobalV::NLOCAL; - ModuleBase::matrix wgEkb; - wgEkb.create(GlobalV::NSPIN, GlobalV::NBANDS); + ModuleBase::matrix wg_ekb; + wg_ekb.create(nspin, nbands); - for(int is=0; iswg(is,ib) * pelec->ekb(is, ib); + wg_ekb(is,ib) = pelec->wg(is,ib) * pelec->ekb(is, ib); } } // construct a DensityMatrix for Gamma-Only - elecstate::DensityMatrix EDM(&pv, GlobalV::NSPIN); + elecstate::DensityMatrix edm(&pv, nspin); #ifdef __PEXSI if (GlobalV::KS_SOLVER == "pexsi") { auto pes = dynamic_cast*>(pelec); - for (int ik = 0; ik < GlobalV::NSPIN; ik++) + for (int ik = 0; ik < nspin; ik++) { - EDM.set_DMK_pointer(ik, pes->get_DM()->pexsi_EDM[ik]); + edm.set_DMK_pointer(ik, pes->get_DM()->pexsi_edm[ik]); } } else #endif { - elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psi[0], EDM); + elecstate::cal_dm_psi(edm.get_paraV_pointer(), wg_ekb, psi[0], edm); } - ModuleBase::timer::tick("Force_LCAO_gamma","cal_edm_2d"); - for(int i=0; i::cal_foverlap( { const int index = mu * pv.ncol + nu; double sum = 0.0; - for(int is=0; is::cal_foverlap( if(isstress) { - StressTools::stress_fill(GlobalC::ucell.lat0, GlobalC::ucell.omega, soverlap); + StressTools::stress_fill(ucell.lat0, ucell.omega, soverlap); } - ModuleBase::timer::tick("Force_LCAO_gamma","cal_foverlap"); + ModuleBase::timer::tick("Force_LCAO","cal_fedm"); return; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/foverlap_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fedm_k.cpp similarity index 75% rename from source/module_hamilt_lcao/hamilt_lcaodft/foverlap_k.cpp rename to source/module_hamilt_lcao/hamilt_lcaodft/fedm_k.cpp index d82d8735f0..e70cb7973d 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/foverlap_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/fedm_k.cpp @@ -25,8 +25,11 @@ #include "module_hamilt_lcao/hamilt_lcaodft/record_adj.h" template<> -void Force_LCAO>::cal_foverlap(const bool isforce, +void Force_LCAO>::cal_fedm( + const bool isforce, const bool isstress, + const UnitCell& ucell, + const elecstate::DensityMatrix, double>* dm, const psi::Psi>* psi, const Parallel_Orbitals& pv, const elecstate::ElecState* pelec, @@ -34,60 +37,58 @@ void Force_LCAO>::cal_foverlap(const bool isforce, ModuleBase::matrix& foverlap, ModuleBase::matrix& soverlap, const K_Vectors* kv, - Record_adj* ra, - const elecstate::DensityMatrix, double>* DM) + Record_adj* ra) { - ModuleBase::TITLE("Force_LCAO_k", "cal_foverlap_k"); - ModuleBase::timer::tick("Force_LCAO_k", "cal_foverlap_k"); + ModuleBase::TITLE("Force_LCAO","cal_fedm"); + ModuleBase::timer::tick("Force_LCAO","cal_fedm"); + + const int nspin = GlobalV::NSPIN; + const int nbands = GlobalV::NBANDS; // construct a DensityMatrix object - elecstate::DensityMatrix, double> EDM(kv, &pv, GlobalV::NSPIN); + elecstate::DensityMatrix, double> edm(kv, &pv, nspin); //-------------------------------------------- // calculate the energy density matrix here. //-------------------------------------------- - ModuleBase::timer::tick("Force_LCAO_k", "cal_edm_2d"); - ModuleBase::matrix wgEkb; - wgEkb.create(kv->get_nks(), GlobalV::NBANDS); - ModuleBase::Memory::record("Force::wgEkb", sizeof(double) * kv->get_nks() * GlobalV::NBANDS); + ModuleBase::matrix wg_ekb; + wg_ekb.create(kv->get_nks(), nbands); + ModuleBase::Memory::record("Force::wg_ekb", sizeof(double) * kv->get_nks() * nbands); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 1024) #endif for (int ik = 0; ik < kv->get_nks(); ik++) { - for (int ib = 0; ib < GlobalV::NBANDS; ib++) + for (int ib = 0; ib < nbands; ib++) { - wgEkb(ik, ib) = pelec->wg(ik, ib) * pelec->ekb(ik, ib); + wg_ekb(ik, ib) = pelec->wg(ik, ib) * pelec->ekb(ik, ib); } } std::vector edm_k(kv->get_nks()); // use the original formula (Hamiltonian matrix) to calculate energy density matrix - if (DM->EDMK.size()) + if (dm->EDMK.size()) { #ifdef _OPENMP -#pragma omp parallel for schedule(static, 1024) +#pragma omp parallel for schedule(static) #endif for (int ik = 0; ik < kv->get_nks(); ++ik) { - //edm_k[ik] = loc.edm_k_tddft[ik]; - EDM.set_DMK_pointer(ik,DM->EDMK[ik].c); + edm.set_DMK_pointer(ik,dm->EDMK[ik].c); } } else { - //elecstate::cal_dm(pv, wgEkb, psi[0], edm_k); // cal_dm_psi - elecstate::cal_dm_psi(EDM.get_paraV_pointer(), wgEkb, psi[0], EDM); + elecstate::cal_dm_psi(edm.get_paraV_pointer(), wg_ekb, psi[0], edm); } - //loc.cal_dm_R(edm_k, ra, edm2d, kv); // cal_dm_2d - EDM.init_DMR(*ra, &GlobalC::ucell); - EDM.cal_DMR(); - EDM.sum_DMR_spin(); + edm.init_DMR(*ra, &ucell); + edm.cal_DMR(); + edm.sum_DMR_spin(); // ModuleBase::timer::tick("Force_LCAO_k", "cal_edm_2d"); //-------------------------------------------- @@ -111,13 +112,13 @@ void Force_LCAO>::cal_foverlap(const bool isforce, #ifdef _OPENMP #pragma omp for schedule(dynamic) #endif - for (int iat = 0; iat < GlobalC::ucell.nat; iat++) + for (int iat = 0; iat < ucell.nat; iat++) { - const int T1 = GlobalC::ucell.iat2it[iat]; - Atom* atom1 = &GlobalC::ucell.atoms[T1]; - const int I1 = GlobalC::ucell.iat2ia[iat]; + const int T1 = ucell.iat2it[iat]; + Atom* atom1 = &ucell.atoms[T1]; + const int I1 = ucell.iat2ia[iat]; // get iat1 - int iat1 = GlobalC::ucell.itia2iat(T1, I1); + int iat1 = ucell.itia2iat(T1, I1); double* foverlap_iat; if (isforce) { @@ -133,23 +134,23 @@ void Force_LCAO>::cal_foverlap(const bool isforce, } #endif int irr = pv.nlocstart[iat]; - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); for (int cb = 0; cb < ra->na_each[iat]; ++cb) { const int T2 = ra->info[iat][cb][3]; const int I2 = ra->info[iat][cb][4]; - const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); - Atom* atom2 = &GlobalC::ucell.atoms[T2]; + Atom* atom2 = &ucell.atoms[T2]; // get iat2 - int iat2 = GlobalC::ucell.itia2iat(T2, I2); + int iat2 = ucell.itia2iat(T2, I2); double Rx = ra->info[iat][cb][0]; double Ry = ra->info[iat][cb][1]; double Rz = ra->info[iat][cb][2]; // get BaseMatrix - hamilt::BaseMatrix* tmp_matrix = EDM.get_DMR_pointer(1)->find_matrix(iat1, iat2, Rx, Ry, Rz); + hamilt::BaseMatrix* tmp_matrix = edm.get_DMR_pointer(1)->find_matrix(iat1, iat2, Rx, Ry, Rz); if(tmp_matrix == nullptr) { continue; @@ -161,7 +162,7 @@ void Force_LCAO>::cal_foverlap(const bool isforce, { for (int nu = 0; nu < pv.get_col_size(iat2); ++nu) { - // here do not sum over spin due to EDM.sum_DMR_spin(); + // here do not sum over spin due to edm.sum_DMR_spin(); double edm2d1 = tmp_matrix->get_value(mu,nu); double edm2d2 = 2.0 * edm2d1; @@ -233,16 +234,16 @@ void Force_LCAO>::cal_foverlap(const bool isforce, if (isstress) { - StressTools::stress_fill(GlobalC::ucell.lat0, GlobalC::ucell.omega, soverlap); + StressTools::stress_fill(ucell.lat0, ucell.omega, soverlap); } if (total_irr != pv.nnr) { ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "wrong irr", total_irr); - ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "wrong LNNR.nnr", pv.nnr); - ModuleBase::WARNING_QUIT("Force_LCAO_k::cal_foverlap_k", "irr!=LNNR.nnr"); + ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "wrong pv.nnr", pv.nnr); + ModuleBase::WARNING_QUIT("Force_LCAO::fedm_k", "irr!=pv.nnr"); } - ModuleBase::timer::tick("Force_LCAO_k", "cal_foverlap_k"); + ModuleBase::timer::tick("Force_LCAO","cal_fedm"); return; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_gamma.cpp new file mode 100644 index 0000000000..55e3bedf96 --- /dev/null +++ b/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_gamma.cpp @@ -0,0 +1,72 @@ +#include "FORCE.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_cell/module_neighbor/sltk_grid_driver.h" +#include +#include "module_base/timer.h" + +template<> +void Force_LCAO::cal_ftvnl_dphi( + const elecstate::DensityMatrix* dm, + const Parallel_Orbitals& pv, + const UnitCell& ucell, + LCAO_Matrix& lm, + const bool isforce, + const bool isstress, + ModuleBase::matrix& ftvnl_dphi, + ModuleBase::matrix& stvnl_dphi, + Record_adj* ra) +{ + ModuleBase::TITLE("Force_LCAO","cal_ftvnl_dphi"); + ModuleBase::timer::tick("Force_LCAO","cal_ftvnl_dphi"); + + const int nlocal = GlobalV::NLOCAL; + const int nspin = GlobalV::NSPIN; + + for(int i=0; i= 0 && nu >= 0 ) + { + const int index = mu * pv.ncol + nu; + //contribution from deriv of AO's in T+VNL term + + double sum = 0.0; + for(int is=0; isget_DMK(is+1, 0, nu, mu); + } + sum *= 2.0; + + if(isforce) + { + ftvnl_dphi(iat,0) += sum * lm.DHloc_fixed_x[index]; + ftvnl_dphi(iat,1) += sum * lm.DHloc_fixed_y[index]; + ftvnl_dphi(iat,2) += sum * lm.DHloc_fixed_z[index]; + } + if(isstress) + { + stvnl_dphi(0,0) += sum/2.0 * lm.DHloc_fixed_11[index]; + stvnl_dphi(0,1) += sum/2.0 * lm.DHloc_fixed_12[index]; + stvnl_dphi(0,2) += sum/2.0 * lm.DHloc_fixed_13[index]; + stvnl_dphi(1,1) += sum/2.0 * lm.DHloc_fixed_22[index]; + stvnl_dphi(1,2) += sum/2.0 * lm.DHloc_fixed_23[index]; + stvnl_dphi(2,2) += sum/2.0 * lm.DHloc_fixed_33[index]; + } + } + } + } + + if(isstress) + { + StressTools::stress_fill(ucell.lat0, ucell.omega, stvnl_dphi); + } + + ModuleBase::timer::tick("Force_LCAO","cal_ftvnl_dphi"); + return; +} diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_k.cpp index e0d37f0535..5734bba456 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/ftvnl_dphi_k.cpp @@ -24,7 +24,8 @@ template<> -void Force_LCAO>::cal_ftvnl_dphi(const elecstate::DensityMatrix, double>* DM, +void Force_LCAO>::cal_ftvnl_dphi( + const elecstate::DensityMatrix, double>* dm, const Parallel_Orbitals& pv, const UnitCell& ucell, LCAO_Matrix& lm, @@ -34,15 +35,14 @@ void Force_LCAO>::cal_ftvnl_dphi(const elecstate::DensityMa ModuleBase::matrix& stvnl_dphi, Record_adj* ra) { - ModuleBase::TITLE("Force_LCAO_k", "cal_ftvnl_dphi_k"); - ModuleBase::timer::tick("Force_LCAO_k", "cal_ftvnl_dphi_k"); + ModuleBase::TITLE("Force_LCAO", "cal_ftvnl_dphi"); + ModuleBase::timer::tick("Force_LCAO", "cal_ftvnl_dphi"); const int nspin_DMR = (GlobalV::NSPIN == 2) ? 2 : 1; int total_irr = 0; // get the adjacent atom's information. - // GlobalV::ofs_running << " calculate the ftvnl_dphi_k force" << std::endl; #ifdef _OPENMP #pragma omp parallel { @@ -96,14 +96,14 @@ void Force_LCAO>::cal_ftvnl_dphi(const elecstate::DensityMa std::vector*> tmp_matrix; for (int is = 0; is < nspin_DMR; ++is) { - tmp_matrix.push_back(DM->get_DMR_pointer(is+1)->find_matrix(iat1, iat2, Rx, Ry, Rz)); + tmp_matrix.push_back(dm->get_DMR_pointer(is+1)->find_matrix(iat1, iat2, Rx, Ry, Rz)); } - //hamilt::BaseMatrix* tmp_matrix = DM->get_DMR_pointer(1)->find_matrix(iat1, iat2, Rx, Ry, Rz); + for (int mu = 0; mu < pv.get_row_size(iat1); ++mu) { for (int nu = 0; nu < pv.get_col_size(iat2); ++nu) { - // get value from DM + // get value from dm double dm2d1 = 0.0; for (int is = 0; is < nspin_DMR; ++is) { @@ -164,6 +164,6 @@ void Force_LCAO>::cal_ftvnl_dphi(const elecstate::DensityMa StressTools::stress_fill(ucell.lat0, ucell.omega, stvnl_dphi); } - ModuleBase::timer::tick("Force_LCAO_k", "cal_ftvnl_dphi_k"); + ModuleBase::timer::tick("Force_LCAO", "cal_ftvnl_dphi"); return; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_vl.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fvl_dphi_gamma.cpp similarity index 61% rename from source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_vl.cpp rename to source/module_hamilt_lcao/hamilt_lcaodft/fvl_dphi_gamma.cpp index 6475228443..75d06ce071 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_vl.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/fvl_dphi_gamma.cpp @@ -11,23 +11,25 @@ void Force_LCAO::cal_fvl_dphi( ModuleBase::matrix& fvl_dphi, ModuleBase::matrix& svl_dphi) { - ModuleBase::TITLE("Force_LCAO_gamma","cal_fvl_dphi"); - ModuleBase::timer::tick("Force_LCAO_gamma","cal_fvl_dphi"); - int istep = 1; + ModuleBase::TITLE("Force_LCAO","cal_fvl_dphi"); + ModuleBase::timer::tick("Force_LCAO","cal_fvl_dphi"); + + const int nspin = GlobalV::NSPIN; + fvl_dphi.zero_out(); svl_dphi.zero_out(); - for(int is=0; isget_effective_v(is); const double* vofk_eff1 = nullptr; + if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5) { vofk_eff1 = pot_in->get_effective_vofk(is); - } - if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5) - { - Gint_inout inout(is, + Gint_inout inout( + is, vr_eff1, vofk_eff1, isforce, @@ -36,14 +38,21 @@ void Force_LCAO::cal_fvl_dphi( &svl_dphi, Gint_Tools::job_type::force_meta); - gint.cal_gint(&inout); + gint.cal_gint(&inout); } else { - Gint_inout inout(is, vr_eff1, isforce, isstress, &fvl_dphi, &svl_dphi, Gint_Tools::job_type::force); - gint.cal_gint(&inout); + Gint_inout inout( + is, + vr_eff1, + isforce, + isstress, + &fvl_dphi, + &svl_dphi, + Gint_Tools::job_type::force); + + gint.cal_gint(&inout); } - } if(isstress) @@ -52,10 +61,14 @@ void Force_LCAO::cal_fvl_dphi( { for(int j=0;j<3;j++) { - if(i template<> -void Force_LCAO>::cal_fvl_dphi(const bool isforce, +void Force_LCAO>::cal_fvl_dphi( + const bool isforce, const bool isstress, const elecstate::Potential* pot_in, TGint>::type& gint, ModuleBase::matrix& fvl_dphi, ModuleBase::matrix& svl_dphi) { - ModuleBase::TITLE("Force_LCAO_k", "cal_fvl_dphi_k"); - ModuleBase::timer::tick("Force_LCAO_k", "cal_fvl_dphi_k"); - + ModuleBase::TITLE("Force_LCAO", "cal_fvl_dphi"); if (!isforce && !isstress) { - ModuleBase::timer::tick("Force_LCAO_k", "cal_fvl_dphi_k"); return; } - int istep = 1; + ModuleBase::timer::tick("Force_LCAO", "cal_fvl_dphi"); + + const int nspin = GlobalV::NSPIN; - for (int is = 0; is < GlobalV::NSPIN; ++is) + for (int is = 0; is < nspin; ++is) { const double* vr_eff1 = pot_in->get_effective_v(is); const double* vofk_eff1 = nullptr; + if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) { vofk_eff1 = pot_in->get_effective_vofk(is); @@ -58,6 +59,7 @@ void Force_LCAO>::cal_fvl_dphi(const bool isforce, // fvl_dphi can not be set to zero here if Vna is used if (isstress || isforce) { + // meta GGA functionals if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5) { Gint_inout inout(is, @@ -68,11 +70,19 @@ void Force_LCAO>::cal_fvl_dphi(const bool isforce, &fvl_dphi, &svl_dphi, Gint_Tools::job_type::force_meta); + gint.cal_gint(&inout); } else { - Gint_inout inout(is, vr_eff1, isforce, isstress, &fvl_dphi, &svl_dphi, Gint_Tools::job_type::force); + Gint_inout inout(is, + vr_eff1, + isforce, + isstress, + &fvl_dphi, + &svl_dphi, + Gint_Tools::job_type::force); + gint.cal_gint(&inout); } } @@ -83,6 +93,6 @@ void Force_LCAO>::cal_fvl_dphi(const bool isforce, StressTools::stress_fill(-1.0, GlobalC::ucell.omega, svl_dphi); } - ModuleBase::timer::tick("Force_LCAO_k", "cal_fvl_dphi_k"); + ModuleBase::timer::tick("Force_LCAO", "cal_fvl_dphi"); return; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_tvnl.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp similarity index 76% rename from source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_tvnl.cpp rename to source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp index a4f9fc240f..646074d2c2 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma_tvnl.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_gamma.cpp @@ -6,7 +6,7 @@ template<> void Force_LCAO::cal_fvnl_dbeta( - const elecstate::DensityMatrix* DM, + const elecstate::DensityMatrix* dm, const Parallel_Orbitals& pv, const UnitCell& ucell, const LCAO_Orbitals& orb, @@ -17,8 +17,8 @@ void Force_LCAO::cal_fvnl_dbeta( ModuleBase::matrix& fvnl_dbeta, ModuleBase::matrix& svnl_dbeta) { - ModuleBase::TITLE("Force_LCAO_gamma","cal_fvnl_dbeta_new"); - ModuleBase::timer::tick("Force_LCAO_gamma","cal_fvnl_dbeta_new"); + ModuleBase::TITLE("Force_LCAO","cal_fvnl_dbeta"); + ModuleBase::timer::tick("Force_LCAO","cal_fvnl_dbeta"); double r0[3]; double r1[3]; @@ -39,7 +39,7 @@ void Force_LCAO::cal_fvnl_dbeta( std::vector>>> nlm_tot; nlm_tot.resize(gd.getAdjacentNum() + 1); //this saves and -//Step 1 : Calculate and save and + //Step 1 : Calculate and save and for (int ad1 = 0; ad1 < gd.getAdjacentNum() + 1; ad1++) { const int T1 = gd.getType(ad1); @@ -62,7 +62,11 @@ void Force_LCAO::cal_fvnl_dbeta( const int iw1_all = start1 + iw1; const int iw1_local = pv.global2local_row(iw1_all); const int iw2_local = pv.global2local_col(iw1_all); - if(iw1_local < 0 && iw2_local < 0) continue; + + if(iw1_local < 0 && iw2_local < 0) + { + continue; + } std::vector> nlm; @@ -97,7 +101,7 @@ void Force_LCAO::cal_fvnl_dbeta( } }//ad -//Step 2 : sum to get beta + //Step 2 : sum to get beta for (int ad1 = 0; ad1 < gd.getAdjacentNum() + 1; ++ad1) { const int T1 = gd.getType(ad1); @@ -138,12 +142,20 @@ void Force_LCAO::cal_fvnl_dbeta( { const int iw1_all = start1 + iw1; const int iw1_local = pv.global2local_row(iw1_all); - if(iw1_local < 0)continue; + if(iw1_local < 0) + { + continue; + } for (int iw2 = 0; iw2 < ucell.atoms[T2].nw; ++iw2) + { const int iw2_all = start2 + iw2; const int iw2_local = pv.global2local_col(iw2_all); - if(iw2_local < 0)continue; + + if(iw2_local < 0) + { + continue; + } double nlm[3] = {0,0,0}; double nlm_t[3] = {0,0,0}; //transpose @@ -206,9 +218,8 @@ void Force_LCAO::cal_fvnl_dbeta( double sum = 0.0; for(int is=0; isget_DMK(is+1, 0, iw2_local, iw1_local); + sum += dm->get_DMK(is+1, 0, iw2_local, iw1_local); } sum *= 2.0; @@ -234,71 +245,12 @@ void Force_LCAO::cal_fvnl_dbeta( }//ad2 }//ad1 }//iat + if(isstress) { StressTools::stress_fill(ucell.lat0, ucell.omega, svnl_dbeta); } - ModuleBase::timer::tick("Force_LCAO_gamma","cal_fvnl_dbeta_new"); -} - -template<> -void Force_LCAO::cal_ftvnl_dphi( - const elecstate::DensityMatrix* DM, - const Parallel_Orbitals& pv, - const UnitCell& ucell, - LCAO_Matrix& lm, - const bool isforce, - const bool isstress, - ModuleBase::matrix& ftvnl_dphi, - ModuleBase::matrix& stvnl_dphi, - Record_adj* ra) -{ - ModuleBase::TITLE("Force_LCAO_gamma","cal_ftvnl_dphi"); - ModuleBase::timer::tick("Force_LCAO_gamma","cal_ftvnl_dphi"); - for(int i=0; i= 0 && nu >= 0 ) - { - const int index = mu * pv.ncol + nu; - //contribution from deriv of AO's in T+VNL term - - double sum = 0.0; - for(int is=0; isget_DMK(is+1, 0, nu, mu); - } - sum *= 2.0; - - if(isforce) - { - ftvnl_dphi(iat,0) += sum * lm.DHloc_fixed_x[index]; - ftvnl_dphi(iat,1) += sum * lm.DHloc_fixed_y[index]; - ftvnl_dphi(iat,2) += sum * lm.DHloc_fixed_z[index]; - } - if(isstress) - { - stvnl_dphi(0,0) += sum/2.0 * lm.DHloc_fixed_11[index]; - stvnl_dphi(0,1) += sum/2.0 * lm.DHloc_fixed_12[index]; - stvnl_dphi(0,2) += sum/2.0 * lm.DHloc_fixed_13[index]; - stvnl_dphi(1,1) += sum/2.0 * lm.DHloc_fixed_22[index]; - stvnl_dphi(1,2) += sum/2.0 * lm.DHloc_fixed_23[index]; - stvnl_dphi(2,2) += sum/2.0 * lm.DHloc_fixed_33[index]; - } - } - } - } - if(isstress) - { - StressTools::stress_fill(ucell.lat0, ucell.omega, stvnl_dphi); - } - ModuleBase::timer::tick("Force_LCAO_gamma","cal_ftvnl_dphi"); - return; + ModuleBase::timer::tick("Force_LCAO","cal_fvnl_dbeta"); } + diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp index f3b74d4237..a7f5f1e37c 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/fvnl_dbeta_k.cpp @@ -27,7 +27,8 @@ typedef std::tuple key_tuple; // must consider three-center H matrix. template<> -void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMatrix, double>* DM, +void Force_LCAO>::cal_fvnl_dbeta( + const elecstate::DensityMatrix, double>* dm, const Parallel_Orbitals& pv, const UnitCell& ucell, const LCAO_Orbitals& orb, @@ -38,8 +39,8 @@ void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMa ModuleBase::matrix& fvnl_dbeta, ModuleBase::matrix& svnl_dbeta) { - ModuleBase::TITLE("Force_LCAO_k", "cal_fvnl_dbeta_k_new"); - ModuleBase::timer::tick("Force_LCAO_k", "cal_fvnl_dbeta_k_new"); + ModuleBase::TITLE("Force_LCAO","cal_fvnl_dbeta"); + ModuleBase::timer::tick("Force_LCAO","cal_fvnl_dbeta"); const int nspin = GlobalV::NSPIN; const int nspin_DMR = (nspin == 2) ? 2 : 1; @@ -227,8 +228,10 @@ void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMa // check if this a adjacent atoms. bool is_adj = false; - if (distance < rcut) - is_adj = true; + if (distance < rcut) + { + is_adj = true; + } else if (distance >= rcut) { for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) @@ -269,22 +272,25 @@ void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMa std::vector tmp_matrix_ptr; for (int is = 0; is < nspin_DMR; ++is) { - auto* tmp_base_matrix = DM->get_DMR_pointer(is+1)->find_matrix(iat1, iat2, rx2, ry2, rz2); + auto* tmp_base_matrix = dm->get_DMR_pointer(is+1)->find_matrix(iat1, iat2, rx2, ry2, rz2); tmp_matrix_ptr.push_back(tmp_base_matrix->get_pointer()); } - //hamilt::BaseMatrix* tmp_matrix = DM->get_DMR_pointer(1)->find_matrix(iat1, iat2, rx2, ry2, rz2); - //double* tmp_matrix_ptr = tmp_matrix->get_pointer(); + for (int ad0 = 0; ad0 < adjs.adj_num + 1; ++ad0) { const int T0 = adjs.ntype[ad0]; const int I0 = adjs.natom[ad0]; const int iat = ucell.itia2iat(T0, I0); - if (!iat_recorded && isforce) - adj_iat[ad0] = iat; + if (!iat_recorded && isforce) + { + adj_iat[ad0] = iat; + } // mohan add 2010-12-19 - if (ucell.infoNL.nproj[T0] == 0) - continue; + if (ucell.infoNL.nproj[T0] == 0) + { + continue; + } // const int I0 = gd.getNatom(ad0); // const int start0 = ucell.itiaiw2iwt(T0, I0, 0); @@ -645,7 +651,7 @@ void Force_LCAO>::cal_fvnl_dbeta(const elecstate::DensityMa StressTools::stress_fill(ucell.lat0, ucell.omega, svnl_dbeta); } - ModuleBase::timer::tick("Force_LCAO_k", "cal_fvnl_dbeta_k_new"); + ModuleBase::timer::tick("Force_LCAO","cal_fvnl_dbeta"); return; } diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp index 69c51fef43..798b6cb20f 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp @@ -1,4 +1,6 @@ #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_base/global_variable.h" #include "module_base/parallel_reduce.h" #include "module_base/timer.h" diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp index 33a91ab455..bb586192fd 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp @@ -64,7 +64,6 @@ template HamiltLCAO::HamiltLCAO( Gint_Gamma* GG_in, Gint_k* GK_in, - LCAO_gen_fixedH* genH_in, LCAO_Matrix* LM_in, Local_Orbital_Charge* loc_in, elecstate::Potential* pot_in, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h index 68059e37ab..87038d6af0 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h @@ -3,7 +3,6 @@ #include "module_elecstate/potentials/potential_new.h" #include "module_hamilt_general/hamilt.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h" @@ -27,7 +26,6 @@ class HamiltLCAO : public Hamilt */ HamiltLCAO(Gint_Gamma* GG_in, Gint_k* GK_in, - LCAO_gen_fixedH* genH_in, LCAO_Matrix* LM_in, Local_Orbital_Charge* loc_in, elecstate::Potential* pot_in, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp index 316318bd9b..61565642ee 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp @@ -1,10 +1,10 @@ #include "spar_dh.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" void sparse_format::cal_dH( LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, const int ¤t_spin, const double &sparse_thr, Gint_k &gint_k) @@ -27,7 +27,8 @@ void sparse_format::cal_dH( { GlobalV::CAL_STRESS = false; - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'T', true, GlobalC::ucell, @@ -41,7 +42,8 @@ void sparse_format::cal_dH( } else { - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'T', true, GlobalC::ucell, @@ -52,7 +54,8 @@ void sparse_format::cal_dH( lm.Hloc_fixedR.data()); } - gen_h.build_Nonlocal_mu_new( + LCAO_domain::build_Nonlocal_mu_new( + lm, lm.Hloc_fixed.data(), true, GlobalC::ucell, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h index c2583d0f63..e7e46e9094 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h @@ -12,7 +12,6 @@ namespace sparse_format LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, const int ¤t_spin, const double &sparse_thr, Gint_k &gint_k); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp index ffcb2f7d10..324eac3bf9 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp @@ -2,6 +2,8 @@ #include "spar_st.h" #include "spar_dh.h" #include "spar_hsr.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" // only for INPUT void sparse_format::cal_SR( const Parallel_Orbitals &pv, @@ -44,7 +46,6 @@ void sparse_format::cal_TR( LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, const double &sparse_thr) { ModuleBase::TITLE("sparse_format","cal_TR"); @@ -53,7 +54,16 @@ void sparse_format::cal_TR( lm.Hloc_fixedR.resize(lm.ParaV->nnr); lm.zeros_HSR('T'); - gen_h.build_ST_new('T', 0, ucell, GlobalC::ORB, pv, *uot, &(GlobalC::GridD), lm.Hloc_fixedR.data()); + LCAO_domain::build_ST_new( + lm, + 'T', + 0, + ucell, + GlobalC::ORB, + pv, + *uot, + &(GlobalC::GridD), + lm.Hloc_fixedR.data()); sparse_format::set_R_range(lm.all_R_coor, grid); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h index 1e6fa0e7f0..63b32edd76 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h @@ -23,7 +23,6 @@ namespace sparse_format LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, const double &sparse_thr); //! cal_STN_R_for_T is only called by cal_TR diff --git a/source/module_io/output_mat_sparse.cpp b/source/module_io/output_mat_sparse.cpp index 06fcf4fd12..3acac0968a 100644 --- a/source/module_io/output_mat_sparse.cpp +++ b/source/module_io/output_mat_sparse.cpp @@ -15,7 +15,6 @@ namespace ModuleIO int istep, const ModuleBase::matrix &v_eff, const Parallel_Orbitals &pv, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_k &gint_k, // mohan add 2024-04-01 const ORB_gen_tables* uot, LCAO_Matrix &lm, @@ -29,7 +28,6 @@ namespace ModuleIO _istep(istep), _v_eff(v_eff), _pv(pv), - _gen_h(gen_h), // mohan add 2024-04-02 _gint_k(gint_k), // mohan add 2024-04-01 uot_(uot), _lm(lm), @@ -70,8 +68,7 @@ void Output_Mat_Sparse>::write(void) this->_pv, this->_lm, this->_grid, - uot_, - this->_gen_h); // LiuXh add 2019-07-15 + uot_); // LiuXh add 2019-07-15 } //! generate a file containing the derivatives of the Hamiltonian matrix (in Ry/Bohr) @@ -80,7 +77,6 @@ void Output_Mat_Sparse>::write(void) output_dHR( _istep, this->_v_eff, - this->_gen_h, this->_gint_k, // mohan add 2024-04-01 this->_lm, this->_grid, // mohan add 2024-04-06 diff --git a/source/module_io/output_mat_sparse.h b/source/module_io/output_mat_sparse.h index 0fbe432742..8dcd7083d4 100644 --- a/source/module_io/output_mat_sparse.h +++ b/source/module_io/output_mat_sparse.h @@ -2,7 +2,6 @@ #define OUTPUT_MAT_SPARSE_H #include "module_basis/module_ao/parallel_orbitals.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" #include "module_hamilt_lcao/module_gint/gint_k.h" #include "module_hsolver/hsolver_lcao.h" @@ -21,7 +20,6 @@ class Output_Mat_Sparse int istep, const ModuleBase::matrix& v_eff, const Parallel_Orbitals& pv, - LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02 Gint_k& gint_k, // mohan add 2024-04-01 const ORB_gen_tables* uot, LCAO_Matrix& lm, @@ -50,8 +48,6 @@ class Output_Mat_Sparse const Parallel_Orbitals& _pv; - LCAO_gen_fixedH& _gen_h; // mohan add 2024-04-02 - Gint_k& _gint_k; // mohan add 2024-04-01 // introduced temporarily for LCAO refactoring diff --git a/source/module_io/td_current_io.cpp b/source/module_io/td_current_io.cpp index 805fed213d..167c0d92e6 100644 --- a/source/module_io/td_current_io.cpp +++ b/source/module_io/td_current_io.cpp @@ -1,4 +1,5 @@ #include "td_current_io.h" +#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h" #include "module_base/global_function.h" #include "module_base/global_variable.h" @@ -16,7 +17,6 @@ void ModuleIO::Init_DS_tmp( const Parallel_Orbitals& pv, LCAO_Matrix &lm, - LCAO_gen_fixedH &gen_h, const ORB_gen_tables* uot) { ModuleBase::TITLE("ModuleIO", "Init_DS_tmp"); @@ -36,7 +36,8 @@ void ModuleIO::Init_DS_tmp( ModuleBase::OMP_PARALLEL(init_DSloc_Rxyz); bool cal_deri = true; - gen_h.build_ST_new( + LCAO_domain::build_ST_new( + lm, 'S', cal_deri, GlobalC::ucell, @@ -154,14 +155,13 @@ void ModuleIO::write_current(const int istep, const ORB_gen_tables* uot, const Parallel_Orbitals* pv, Record_adj& ra, - LCAO_Matrix &lm, // mohan add 2024-04-02 - LCAO_gen_fixedH &gen_h) // mohan add 2024-04-02 + LCAO_Matrix &lm) // mohan add 2024-04-02 { ModuleBase::TITLE("ModuleIO", "write_current"); ModuleBase::timer::tick("ModuleIO", "write_current"); //Init_DS_tmp - Init_DS_tmp(*pv, lm, gen_h, uot); + Init_DS_tmp(*pv, lm, uot); // construct a DensityMatrix object elecstate::DensityMatrix, double> DM(&kv,pv,GlobalV::NSPIN); diff --git a/source/module_io/td_current_io.h b/source/module_io/td_current_io.h index 228d8f32f4..253db5cb14 100644 --- a/source/module_io/td_current_io.h +++ b/source/module_io/td_current_io.h @@ -2,7 +2,6 @@ #define TD_CURRENT_H #include "module_elecstate/module_dm/density_matrix.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_elecstate/elecstate_lcao.h" #include "module_psi/psi.h" @@ -17,8 +16,7 @@ void write_current(const int istep, const ORB_gen_tables* uot, const Parallel_Orbitals* pv, Record_adj& ra, - LCAO_Matrix &lm, // mohan add 2024-04-02 - LCAO_gen_fixedH &gen_h); // mohan add 2024-04-02 + LCAO_Matrix &lm); // mohan add 2024-04-02 /// @brief calculate sum_n[𝜌_(𝑛𝑘,𝜇𝜈)] for current calculation void cal_tmp_DM(elecstate::DensityMatrix, double>& DM, const int ik, const int nspin); @@ -27,7 +25,6 @@ void cal_tmp_DM(elecstate::DensityMatrix, double>& DM, cons void Init_DS_tmp( const Parallel_Orbitals& pv, LCAO_Matrix &lm, - LCAO_gen_fixedH &gen_h, const ORB_gen_tables* uot); /// @brief DS_locR will be initialized again in force calculation, so it must be destoryed here. diff --git a/source/module_io/write_HS_R.cpp b/source/module_io/write_HS_R.cpp index 17fbdb39ea..62834a7c47 100644 --- a/source/module_io/write_HS_R.cpp +++ b/source/module_io/write_HS_R.cpp @@ -91,7 +91,6 @@ void ModuleIO::output_HSR(const int& istep, void ModuleIO::output_dHR(const int &istep, const ModuleBase::matrix &v_eff, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 Gint_k &gint_k, // mohan add 2024-04-01 LCAO_Matrix &lm, // mohan add 2024-04-01 Grid_Driver &grid, // mohan add 2024-04-06 @@ -118,7 +117,6 @@ void ModuleIO::output_dHR(const int &istep, lm, grid, uot, - gen_h, cspin, sparse_thr, gint_k); @@ -144,7 +142,6 @@ void ModuleIO::output_dHR(const int &istep, lm, grid, uot, - gen_h, cspin, sparse_thr, gint_k); @@ -208,7 +205,6 @@ void ModuleIO::output_TR( LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 const std::string &TR_filename, const bool &binary, const double &sparse_thr @@ -233,7 +229,6 @@ void ModuleIO::output_TR( lm, grid, uot, - gen_h, sparse_thr); ModuleIO::save_sparse( diff --git a/source/module_io/write_HS_R.h b/source/module_io/write_HS_R.h index 020d012be1..479a7e25b0 100644 --- a/source/module_io/write_HS_R.h +++ b/source/module_io/write_HS_R.h @@ -5,8 +5,8 @@ #include "module_cell/klist.h" #include "module_hamilt_general/hamilt.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_hamilt_lcao/module_gint/gint_k.h" +#include "module_hamilt_pw/hamilt_pwdft/global.h" namespace ModuleIO { @@ -27,7 +27,6 @@ namespace ModuleIO void output_dHR( const int &istep, const ModuleBase::matrix& v_eff, - LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02 Gint_k& gint_k, // mohan add 2024-04-01 LCAO_Matrix &lm, // mohan add 2024-04-01 Grid_Driver &grid, // mohan add 2024-04-06 @@ -43,7 +42,6 @@ namespace ModuleIO LCAO_Matrix &lm, Grid_Driver &grid, const ORB_gen_tables* uot, - LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02 const std::string& TR_filename = "data-TR-sparse_SPIN0.csr", const bool& binary = false, const double& sparse_threshold = 1e-10); diff --git a/source/module_io/write_dos_lcao.cpp b/source/module_io/write_dos_lcao.cpp index 2c1938b7c5..6197894b1d 100644 --- a/source/module_io/write_dos_lcao.cpp +++ b/source/module_io/write_dos_lcao.cpp @@ -16,7 +16,6 @@ #ifdef __LCAO #include "module_cell/module_neighbor/sltk_atom_arrange.h" //qifeng-2019-01-21 #include "module_cell/module_neighbor/sltk_grid_driver.h" -#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h" #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h" #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h" #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"