diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h index c3e7ae3a2d..a3637fc64d 100644 --- a/source/module_elecstate/elecstate_lcao.h +++ b/source/module_elecstate/elecstate_lcao.h @@ -16,13 +16,23 @@ class ElecStateLCAO : public ElecState { public: ElecStateLCAO(){} // will be called by ElecStateLCAO_TDDFT + /* + Note: on the removal of LOWF + The entire instance of Local_Orbital_wfc is not really necessary, because + this class only need it to do 2dbcd wavefunction gathering. Therefore, + what is critical is the 2dbcd handle which stores information about the + wavefunction, and another free function to do the 2dbcd gathering. + + A future work would be replace the Local_Orbital_wfc with a 2dbcd handle. + A free gathering function will also be needed. + */ ElecStateLCAO(Charge* chg_in , const K_Vectors* klist_in , int nks_in, Local_Orbital_Charge* loc_in , Gint_Gamma* gint_gamma_in, //mohan add 2024-04-01 Gint_k* gint_k_in, //mohan add 2024-04-01 - Local_Orbital_wfc* lowf_in , + Local_Orbital_wfc* lowf_in, ModulePW::PW_Basis* rhopw_in , ModulePW::PW_Basis_Big* bigpw_in ) { diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp index c4c1e21e19..a835fceb40 100644 --- a/source/module_elecstate/elecstate_lcao_tddft.cpp +++ b/source/module_elecstate/elecstate_lcao_tddft.cpp @@ -51,7 +51,6 @@ void ElecStateLCAO_TDDFT::psiToRho_td(const psi::Psi>& psi) } } - //this->loc->cal_dk_k(*this->lowf->gridt, this->wg, *(this->klist)); for (int is = 0; is < GlobalV::NSPIN; is++) { ModuleBase::GlobalFunc::ZEROS(this->charge->rho[is], this->charge->nrxx); // mohan 2009-11-10 diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index bdfbbd93ad..f80b85265d 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -163,10 +163,26 @@ void ESolver_KS_LCAO::before_all_runners(Input& inp, UnitCell& ucell) this->gen_h.LM = &this->LM; //! pass basis-pointer to EState and Psi - this->LOC.ParaV = this->LOWF.ParaV = this->LM.ParaV = &(this->orb_con.ParaV); + /* + Inform: on getting rid of ORB_control and Parallel_Orbitals + + Have to say it is all the stories start, the ORB_control instance pass its Parallel_Orbitals instance to + Local_Orbital_Charge, Local_Orbital_Wfc and LCAO_Matrix, which is actually for getting information + of 2D block-cyclic distribution. + + To remove LOC, LOWF and LM use in functions, one must make sure there is no more information imported + to those classes. Then places where to get information from them can be substituted to orb_con + + Plan: + 1. Specifically for paraV, the thing to do first is to replace the use of ParaV to the oen of ORB_control. + Then remove ORB_control and place paraV somewhere. + */ + this->LOC.ParaV = &(this->orb_con.ParaV); + this->LOWF.ParaV = &(this->orb_con.ParaV); + this->LM.ParaV = &(this->orb_con.ParaV); // 5) initialize Density Matrix - dynamic_cast*>(this->pelec)->init_DM(&this->kv, this->LM.ParaV, GlobalV::NSPIN); + dynamic_cast*>(this->pelec)->init_DM(&this->kv, &(this->orb_con.ParaV), GlobalV::NSPIN); if (GlobalV::CALCULATION == "get_S") @@ -232,7 +248,7 @@ void ESolver_KS_LCAO::before_all_runners(Input& inp, UnitCell& ucell) // 10) init HSolver if (this->phsol == nullptr) { - this->phsol = new hsolver::HSolverLCAO(this->LOWF.ParaV); + this->phsol = new hsolver::HSolverLCAO(&(this->orb_con.ParaV)); this->phsol->method = GlobalV::KS_SOLVER; } @@ -286,7 +302,14 @@ void ESolver_KS_LCAO::init_after_vc(Input& inp, UnitCell& ucell) ModuleBase::timer::tick("ESolver_KS_LCAO", "init_after_vc"); ESolver_KS::init_after_vc(inp, ucell); - + /* + Notes: on the removal of LOWF + Following constructor of ElecStateLCAO requires LOWF. However, ElecState only need + LOWF to do wavefunction 2dbcd (2D BlockCyclicDistribution) gathering. So, a free + function is needed to replace the use of LOWF. The function indeed needs the information + about 2dbcd, therefore another instance storing the information is needed instead. + Then that instance will be the input of "the free function to gather". + */ if (GlobalV::md_prec_level == 2) { delete this->pelec; @@ -296,7 +319,7 @@ void ESolver_KS_LCAO::init_after_vc(Input& inp, UnitCell& ucell) &(this->LOC), &(this->GG), // mohan add 2024-04-01 &(this->GK), // mohan add 2024-04-01 - &(this->LOWF), + &(this->LOWF), // should be replaced by a 2dbcd handle, if insist the "print_psi" must be in ElecState class this->pw_rho, this->pw_big); @@ -657,10 +680,10 @@ void ESolver_KS_LCAO::iter_init(const int istep, const int iter) // first need to calculate the weight according to // electrons number. if (istep == 0 - && this->wf.init_wfc == "file" - && this->LOWF.error == 0) - { - if (iter == 1) + && this->wf.init_wfc == "file" // Note: on the removal of LOWF + && this->LOWF.error == 0) // this means the wavefunction is read without any error. + { // However the I/O of wavefunction are nonsence to be implmented in different places. + if (iter == 1) // once the reading of wavefunction has any error, should exit immediately. { std::cout << " WAVEFUN -> CHARGE " << std::endl; @@ -831,11 +854,11 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr) #ifdef __EXX if (GlobalC::exx_info.info_ri.real_number) { - this->exd->exx_hamilt2density(*this->pelec, *this->LOWF.ParaV, iter); + this->exd->exx_hamilt2density(*this->pelec, this->orb_con.ParaV, iter); } else { - this->exc->exx_hamilt2density(*this->pelec, *this->LOWF.ParaV, iter); + this->exc->exx_hamilt2density(*this->pelec, this->orb_con.ParaV, iter); } #endif @@ -861,8 +884,6 @@ void ESolver_KS_LCAO::hamilt2density(int istep, int iter, double ethr) #ifdef __DEEPKS if (GlobalV::deepks_scf) { - const Parallel_Orbitals* pv = this->LOWF.ParaV; - const std::vector>& dm = dynamic_cast*>(this->pelec)->get_DM()->get_DMK_vector(); @@ -940,7 +961,7 @@ void ESolver_KS_LCAO::update_pot(const int istep, const int iter) GlobalV::out_app_flag, "H", "data-" + std::to_string(ik), - *this->LOWF.ParaV, + this->orb_con.ParaV, GlobalV::DRANK); ModuleIO::save_mat(istep, s_mat.p, @@ -951,7 +972,7 @@ void ESolver_KS_LCAO::update_pot(const int istep, const int iter) GlobalV::out_app_flag, "S", "data-" + std::to_string(ik), - *this->LOWF.ParaV, + this->orb_con.ParaV, GlobalV::DRANK); } } @@ -1048,7 +1069,7 @@ void ESolver_KS_LCAO::iter_finish(int iter) if (GlobalC::restart.info_save.save_H && two_level_step > 0 && (!GlobalC::exx_info.info_global.separate_loop || iter == 1)) // to avoid saving the same value repeatedly { - std::vector Hexxk_save(this->LOWF.ParaV->get_local_size()); + std::vector Hexxk_save(this->orb_con.ParaV.get_local_size()); for (int ik = 0;ik < this->kv.nks;++ik) { ModuleBase::GlobalFunc::ZEROS(Hexxk_save.data(), Hexxk_save.size()); @@ -1057,7 +1078,7 @@ void ESolver_KS_LCAO::iter_finish(int iter) opexx_save.contributeHk(ik); - GlobalC::restart.save_disk("Hexx", ik, this->LOWF.ParaV->get_local_size(), Hexxk_save.data()); + GlobalC::restart.save_disk("Hexx", ik, this->orb_con.ParaV.get_local_size(), Hexxk_save.data()); } if (GlobalV::MY_RANK == 0) { @@ -1234,7 +1255,7 @@ void ESolver_KS_LCAO::after_scf(const int istep) GlobalC::ucell, GlobalC::ORB, GlobalC::GridD, - this->LOWF.ParaV, + &(this->orb_con.ParaV), *(this->psi), dynamic_cast*>(this->pelec)->get_DM()); @@ -1251,7 +1272,7 @@ void ESolver_KS_LCAO::after_scf(const int istep) RPA_LRI rpa_lri_double(GlobalC::exx_info.info_ri); rpa_lri_double.cal_postSCF_exx(*dynamic_cast*>(this->pelec)->get_DM(), MPI_COMM_WORLD, this->kv); rpa_lri_double.init(MPI_COMM_WORLD, this->kv); - rpa_lri_double.out_for_RPA(*(this->LOWF.ParaV), *(this->psi), this->pelec); + rpa_lri_double.out_for_RPA(this->orb_con.paraV, *(this->psi), this->pelec); } #endif @@ -1418,7 +1439,7 @@ ModuleIO::Output_Mat_Sparse ESolver_KS_LCAO::create_Output_Mat_Spars INPUT.out_mat_r, istep, this->pelec->pot->get_effective_v(), - *this->LOWF.ParaV, + this->orb_con.ParaV, this->gen_h, // mohan add 2024-04-06 this->GK, // mohan add 2024-04-01 this->LM, diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index 263520e5f1..b703ce6ed7 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -109,23 +109,23 @@ void ESolver_KS_LCAO::beforesolver(const int istep) if (GlobalV::GAMMA_ONLY_LOCAL) { nsk = GlobalV::NSPIN; - ncol = this->LOWF.ParaV->ncol_bands; + ncol = this->orb_con.ParaV.ncol_bands; if (GlobalV::KS_SOLVER == "genelpa" || GlobalV::KS_SOLVER == "lapack_gvx" || GlobalV::KS_SOLVER=="pexsi" || GlobalV::KS_SOLVER == "cusolver") { - ncol = this->LOWF.ParaV->ncol; + ncol = this->orb_con.ParaV.ncol; } } else { nsk = this->kv.nks; #ifdef __MPI - ncol = this->LOWF.ParaV->ncol_bands; + ncol = this->orb_con.ParaV.ncol_bands; #else ncol = GlobalV::NBANDS; #endif } - this->psi = new psi::Psi(nsk, ncol, this->LOWF.ParaV->nrow, nullptr); + this->psi = new psi::Psi(nsk, ncol, this->orb_con.ParaV.nrow, nullptr); } // prepare grid in Gint @@ -574,7 +574,6 @@ void ESolver_KS_LCAO::nscf(void) // Peize Lin add 2018-08-14 if (GlobalC::exx_info.info_global.cal_exx) { - // GlobalC::exx_lcao.cal_exx_elec_nscf(this->LOWF.ParaV[0]); const std::string file_name_exx = GlobalV::global_out_dir + "HexxR" + std::to_string(GlobalV::MY_RANK); if (GlobalC::exx_info.info_ri.real_number) { @@ -670,7 +669,7 @@ void ESolver_KS_LCAO::nscf(void) this->sf, this->kv, this->psi, - this->LOWF.ParaV); + &(this->orb_con.ParaV)); } else if (INPUT.wannier_method == 2) { @@ -682,7 +681,7 @@ void ESolver_KS_LCAO::nscf(void) INPUT.nnkpfile, INPUT.wannier_spin); - myWannier.calculate(this->pelec->ekb, this->kv, *(this->psi), this->LOWF.ParaV); + myWannier.calculate(this->pelec->ekb, this->kv, *(this->psi), &(this->orb_con.ParaV)); } #endif } @@ -696,7 +695,6 @@ void ESolver_KS_LCAO::nscf(void) // below is for DeePKS NSCF calculation #ifdef __DEEPKS - const Parallel_Orbitals* pv = this->LOWF.ParaV; if (GlobalV::deepks_out_labels || GlobalV::deepks_scf) { const elecstate::DensityMatrix* dm diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp index 995e5b5e08..8eafdab3c6 100644 --- a/source/module_esolver/esolver_ks_lcao_tddft.cpp +++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp @@ -102,7 +102,8 @@ void ESolver_KS_LCAO_TDDFT::before_all_runners(Input& inp, UnitCell& ucell) // pass Hamilt-pointer to Operator this->gen_h.LM = &this->LM; // pass basis-pointer to EState and Psi - this->LOC.ParaV = this->LOWF.ParaV = this->LM.ParaV; + this->LOC.ParaV = this->LM.ParaV;; + this->LOWF.ParaV = this->LM.ParaV; // init DensityMatrix dynamic_cast>*>(this->pelec) @@ -111,7 +112,7 @@ void ESolver_KS_LCAO_TDDFT::before_all_runners(Input& inp, UnitCell& ucell) // init Psi, HSolver, ElecState, Hamilt if (this->phsol == nullptr) { - this->phsol = new hsolver::HSolverLCAO>(this->LOWF.ParaV); + this->phsol = new hsolver::HSolverLCAO>(this->LM.ParaV); this->phsol->method = GlobalV::KS_SOLVER; } @@ -274,7 +275,7 @@ void ESolver_KS_LCAO_TDDFT::update_pot(const int istep, const int iter) GlobalV::out_app_flag, "H", "data-" + std::to_string(ik), - *this->LOWF.ParaV, + *this->LM.ParaV, GlobalV::DRANK); ModuleIO::save_mat(istep, @@ -286,7 +287,7 @@ void ESolver_KS_LCAO_TDDFT::update_pot(const int istep, const int iter) GlobalV::out_app_flag, "S", "data-" + std::to_string(ik), - *this->LOWF.ParaV, + *this->LM.ParaV, GlobalV::DRANK); } } @@ -333,8 +334,8 @@ void ESolver_KS_LCAO_TDDFT::update_pot(const int istep, const int iter) { #ifdef __MPI this->psi_laststep = new psi::Psi>(kv.nks, - this->LOWF.ParaV->ncol_bands, - this->LOWF.ParaV->nrow, + this->LM.ParaV->ncol_bands, + this->LM.ParaV->nrow, nullptr); #else this->psi_laststep = new psi::Psi>(kv.nks, GlobalV::NBANDS, GlobalV::NLOCAL, nullptr); diff --git a/source/module_esolver/io_npz.cpp b/source/module_esolver/io_npz.cpp index 8a557884fe..9c58bd7b0b 100644 --- a/source/module_esolver/io_npz.cpp +++ b/source/module_esolver/io_npz.cpp @@ -33,7 +33,7 @@ void ESolver_KS_LCAO::read_mat_npz(std::string& zipname, hamilt::HContai { ModuleBase::TITLE("LCAO_Hamilt","read_mat_npz"); - const Parallel_Orbitals* paraV = this->LOWF.ParaV; + const Parallel_Orbitals* paraV = &(this->orb_con.ParaV); #ifdef __USECNPY diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp index 4596a00b13..fa651b1509 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp @@ -447,683 +447,3 @@ void Local_Orbital_wfc::wfc_2d_to_grid(const int istep, ModuleBase::timer::tick("Local_Orbital_wfc","wfc_2d_to_grid"); } #endif - -/** - * @brief Temporary namespace for Scalapack 2D Block-Cyclic-Distribution (2D-BCD) strategy, storing detached functions from Local_Orbital_wfc and related classes. (The most important) parts of them will be moved to interface-to-ScaLAPACK class in the future, and the rest will be moved elsewhere. - * - */ -namespace ScaLAPACK2DBCD{ -// standalone python style string split function -/** - * @brief split a string by a delimiter - * - * @param src string to be split - * @param delimiter delimiter - * @param dest vector to store the splitted strings - */ -void split(const std::string& src, const char& delimiter, std::vector& dest) -{ - std::string str = src; - std::string substring; - std::string::size_type start = 0, index; - do - { - index = str.find_first_of(delimiter, start); - if (index != std::string::npos) - { - substring = str.substr(start, index - start); - dest.push_back(substring); - start = str.find_first_not_of(delimiter, index); - if (start == std::string::npos) return; - } - } while (index != std::string::npos); - // the last token - substring = str.substr(start); - dest.push_back(substring); -} -// standalone python style string startswith function -/** - * @brief check if a string starts with a prefix - * - * @param src string to be checked - * @param prefix prefix - * @return true - * @return false - */ -bool startswith(const std::string& src, const std::string& prefix) -{ - return src.find(prefix) == 0; -} -// standalone python style string endswith function -/** - * @brief check if a string ends with a suffix - * - * @param src string to be checked - * @param suffix suffix - * @return true - * @return false - */ -bool endswith(const std::string& src, const std::string& suffix) -{ - return src.rfind(suffix) == src.size() - suffix.size(); -} -/** - * @brief cast a string to a vector of T, the string should be in the format of "real imag real imag ..." - * - * @tparam T - * @param src string to be cast - * @param dest vector to store the casted values - * @param delimiter delimiter between real and imag parts - */ -template -void cast_to_stdcomplex(const std::string& src, std::vector& dest, const char& delimiter = ' ') -{ - // there may be parentheses in the string - // so we need to remove them first - std::string str = src; - str.erase(std::remove(str.begin(), str.end(), '('), str.end()); - str.erase(std::remove(str.begin(), str.end(), ')'), str.end()); - std::vector tokens; - split(str, delimiter, tokens); - for (const auto& token : tokens) - { - dest.push_back(std::stod(token)); - } -} - -#include -#include -// read file named as LOWF_K_*.txt, the name should not be hard-coded -// structure of LOWF_K_*.txt: -// [ik] (index of k points) -// [xk] [yk] [zk] -// [nb] (number of bands) -// [nlocal] (number of orbitals) -// [ib] (band) -// [energy] (energy of the band) -// [occ] (Occupations) -// [real] [imag] [real] [imag] ... (wavefunction coefficients) -// ... -// [ib] (band) -// [energy] (energy of the band) -// [occ] (Occupations) -// [real] [imag] [real] [imag] ... (wavefunction coefficients) -// ... -/** - * @brief read ABACUS output wavefunction file named as LOWF_K_*.txt for std::complex wavefunction coefficients - * - * @param flowf file name under convention of LOWF_K_*.txt - * @param ik index of k points, will be returned - * @param kvec_c k vector in Cartesian coordinates, will be returned - * @param nbands number of bands, will be returned - * @param nlocal number of orbitals, will be returned - * @param energies energies of bands, will be returned - * @param occs occupations, will be returned - * @param lowf wavefunction coefficients, will be returned. Note! 1D array of complex numbers - */ -void read_abacus_lowf(const std::string& flowf, //<[in] LOWF_K_*.txt - int& ik, //<[out] index of k points - std::vector& kvec_c, //<[out] k vector - int& nbands, //<[out] number of bands - int& nlocal, //<[out] number of orbitals - std::vector& energies, //<[out] energies of bands - std::vector& occs, //<[out] occupations - std::vector>& lowf) //<[out] wavefunction coefficients -{ - std::ifstream ifs(flowf.c_str()); - if(!ifs) ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); - // will use line-by-line parse - std::string line; - bool read_kvec = false; - int iband = 0; - int ilocal = 0; - while (std::getline(ifs, line)) - { - // remove leading and trailing whitespaces - line = std::regex_replace(line, std::regex("^ +| +$|( ) +"), "$1"); - if(endswith(line, "(index of k points)")) - { - std::vector result; - split(line, ' ', result); - ik = std::stoi(result[0]); - read_kvec = true; - continue; - } - if(read_kvec) - { - std::vector result; - split(line, ' ', result); - for (const auto& token : result) - { - kvec_c.push_back(std::stod(token)); - } - read_kvec = false; - continue; - } - if(endswith(line, "(number of bands)")) - { - std::vector result; - split(line, ' ', result); - nbands = std::stoi(result[0]); - } - else if(endswith(line, "(number of orbitals)")) - { - std::vector result; - split(line, ' ', result); - nlocal = std::stoi(result[0]); - } - else if(endswith(line, "(band)")) - { - std::vector result; - split(line, ' ', result); - #ifdef __DEBUG - assert (ilocal == 0)||(ilocal == nlocal); - #endif - iband = std::stoi(result[0]); - ilocal = 0; // reset ilocal - } - else if(endswith(line, "(Ry)")) - { - std::vector result; - split(line, ' ', result); - energies.push_back(std::stod(result[0])); - } - else if(endswith(line, "(Occupations)")) - { - std::vector result; - split(line, ' ', result); - occs.push_back(std::stod(result[0])); - #ifdef __DEBUG - assert (ilocal == 0); - #endif - } - else// read wavefunction coefficients - { - std::vector result; - split(line, ' ', result); - for (const auto& token : result) - { - std::vector temp; - cast_to_stdcomplex(token, temp, ' '); - for (int i = 0; i < temp.size(); i += 2) - { - lowf.push_back(std::complex(temp[i], temp[i + 1])); - } - ilocal += temp.size() / 2; - } - } - } - #ifdef __DEBUG - assert (lowf.size() == nbands * nlocal); - assert (iband == nbands); - assert (ilocal == nlocal); - #endif -} -// overload for the case of double wavefunction coefficients -/** - * @brief double overloaded version of read_abacus_lowf, read wavefunction coefficients as double - * - * @param flowf file name under convention of LOWF_K_*.txt - * @param ik index of k points, will be returned - * @param kvec_c k vector in Cartesian coordinates, will be returned - * @param nbands number of bands, will be returned - * @param nlocal number of orbitals, will be returned - * @param energies energies of bands, will be returned - * @param occs occupations, will be returned - * @param lowf wavefunction coefficients, will be returned. Note! 1D array of complex numbers - */ -void read_abacus_lowf(const std::string& flowf, //<[in] LOWF_K_*.txt - int& ik, //<[out] index of k points - std::vector& kvec_c, //<[out] k vector - int& nbands, //<[out] number of bands - int& nlocal, //<[out] number of orbitals - std::vector& energies, //<[out] energies of bands - std::vector& occs, //<[out] occupations - std::vector& lowf) //<[out] wavefunction coefficients -{ - std::ifstream ifs(flowf.c_str()); - if(!ifs) ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); - // will use line-by-line parse - std::string line; - bool read_kvec = false; - int iband = 0; - int ilocal = 0; - while (std::getline(ifs, line)) - { - // remove leading and trailing whitespaces - line = std::regex_replace(line, std::regex("^ +| +$|( ) +"), "$1"); - if(endswith(line, "(index of k points)")) - { - std::vector result; - split(line, ' ', result); - ik = std::stoi(result[0]); - read_kvec = true; - continue; - } - if(read_kvec) - { - std::vector result; - split(line, ' ', result); - for (const auto& token : result) - { - kvec_c.push_back(std::stod(token)); - } - read_kvec = false; - continue; - } - if(endswith(line, "(number of bands)")) - { - std::vector result; - split(line, ' ', result); - nbands = std::stoi(result[0]); - } - else if(endswith(line, "(number of orbitals)")) - { - std::vector result; - split(line, ' ', result); - nlocal = std::stoi(result[0]); - } - else if(endswith(line, "(band)")) - { - std::vector result; - split(line, ' ', result); - #ifdef __DEBUG - assert (ilocal == 0)||(ilocal == nlocal); - #endif - iband = std::stoi(result[0]); - ilocal = 0; // reset ilocal - } - else if(endswith(line, "(Ry)")) - { - std::vector result; - split(line, ' ', result); - energies.push_back(std::stod(result[0])); - } - else if(endswith(line, "(Occupations)")) - { - std::vector result; - split(line, ' ', result); - occs.push_back(std::stod(result[0])); - #ifdef __DEBUG - assert (ilocal == 0); - #endif - } - else// read wavefunction coefficients - { - std::vector result; - split(line, ' ', result); - for (const auto& token : result) - { - lowf.push_back(std::stod(token)); - ilocal += 1; - } - } - } - #ifdef __DEBUG - assert (lowf.size() == nbands * nlocal); - assert (iband == nbands); - assert (ilocal == nlocal); - #endif -} - -/** - * @brief Developer notes on Scalapack 2D Block-Cyclic-Distribution (2D-BCD) strategy - * One matrix should first be divided into blocks with size MB*NB, then blocks will be distributed to different processors (onto processor grid). - * A schematic view of 2D-BCD is shown below. Firstly a matrix is like: - * - * 0 1 2 3 4 5 6 7 8 ... - * 0 a11 a12 a13 a14 a15 a16 a17 a18 a19 ... - * 1 a21 a22 a23 a24 a25 a26 a27 a28 a29 ... - * 2 a31 a32 a33 a34 a35 a36 a37 a38 a39 ... - * 3 a41 a42 a43 a44 a45 a46 a47 a48 a49 ... - * 4 a51 a52 a53 a54 a55 a56 a57 a58 a59 ... - * 5 a61 a62 a63 a64 a65 a66 a67 a68 a69 ... - * 6 a71 a72 a73 a74 a75 a76 a77 a78 a79 ... - * ... - * But remember it is stored in a 1D array in memory, so all elements are stored in a continuous manner. This means a11 -> a0, ...: - * 0 1 2 3 4 5 6 7 8 ... - * 0 a0 a1 a2 a3 a4 a5 a6 a7 a8 ... - * 1 an an+1 ... ... ... ... ... ... ... - * - * if MB=2, NB=3, then the matrix is divided into 2*3 blocks: - * | 0 1 2 | 3 4 5 | 6 7 8 | ... - * -+-------------+-------------+-------------+ ... - * 0| a11 a12 a13 | a14 a15 a16 | a17 a18 a19 | ... - * 1| a21 a22 a23 | a24 a25 a26 | a27 a28 a29 | ... - * -+-------------+-------------+-------------+ ... - * 2| a31 a32 a33 | a34 a35 a36 | a37 a38 a39 | ... - * 3| a41 a42 a43 | a44 a45 a46 | a47 a48 a49 | ... - * -+-------------+-------------+-------------+ ... - * 4| a51 a52 a53 | a54 a55 a56 | a57 a58 a59 | ... - * 5| a61 a62 a63 | a64 a65 a66 | a67 a68 a69 | ... - * -+-------------+-------------+-------------+ ... - * 6| a71 a72 a73 | a74 a75 a76 | a77 a78 a79 | ... - * ... - * - * if there are 4 processors, then the blocks will be distributed to the processor grid: - * - * +-------------+-------------+-------------+ ... - * | Processor 0 | Processor 1 | Processor 0 | ... - * | (0, 0) | (0, 1) | (0, 0) | ... - * +-------------+-------------+-------------+ ... - * | Processor 2 | Processor 3 | Processor 2 | ... - * | (1, 0) | (1, 1) | (1, 0) | ... - * +-------------+-------------+-------------+ ... - * | Processor 0 | Processor 1 | Processor 0 | ... - * | (0, 0) | (0, 1) | (0, 0) | ... - * +-------------+-------------+-------------+ ... - * | Processor 2 | Processor 3 | Processor 2 | ... - * ... - * - * The processor grid is a 2D grid with size Pr*Pc, if there are 20 blocks in one line and there are only 4 processors, then from 0th to 19th block, they will correspond to processor 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3. - * Additionaly, present Scalapack requires continuous memory allocation of blocks, therefore, the blocks should be allocated in a continuous manner. - * - * For example on Processor 0, the distributed matrix should be like: - * - * a11 a12 a13 | a17 a18 a19 ... - * a21 a22 a23 | a27 a28 a29 ... - * ------------+------------ ... - * a51 a52 a53 | a57 a58 a59 ... - * a61 a62 a63 | a67 a68 a69 ... - * ... - * - * or say it will be locally indiced as: - * +-------------+-------------+... +-------------+-------------+... - * | Block 0 | Block 2 |... | Block 0 | Block 1 |... - * | | |... | | |... - * +-------------+-------------+... +-------------+-------------+... - * | Block 2n | Block 2n+2 |... | Block n | Block n+1 |... - * | | |... | | |... - * +-------------+-------------+... +-------------+-------------+... - * ... - * - * Moreover it should be like (1D flatten, row-major, continuous memory allocation, leading dimension specified to let it be 2D matrix): - * a11 a12 a13 a17 a18 a19 ... a21 a22 a23 a27 a28 a29 ... a51 a52 a53 a57 a58 a59 ... - * they will be locally indiced as: - * a1 a2 a3 a4 a5 a6 ... - * - * The following two functions are for converting between global index and local index. - */ - -/** - * @brief Find global index of a matrix element (index before 2DBCD) from local index (index after 2DBCD) and processor index. (2DBCD: 2D Block-Cyclic-Distribution) - * - * @param ixx_local [in] index of matrix element on one processor (across blocks distributed on it) - * @param iproc [in] index of processor - * @param ixx_global [out] global index of matrix element - * @param nxx_block_local [in] number of "something" per block per processor - * @param nprocs [in] number of processors - */ -void index_before2DBCD(const int& ixx_local, - const int& iproc, - int& ixx_global, - const int& nxx_block_local, - const int& nprocs) -{ - // calculate the index of block the element belongs to. - // note that it is just the index within the domain of processor - int iblock_local = ixx_local / nxx_block_local; - // calculate the index within a block - int ixx_block_local = ixx_local % nxx_block_local; - - // then there seems to be a (iblock, iproc) 2-dimensional coordinate - // and flattened to a one-dimensional index - // means the 2d mapping would be: - // proc1, proc2, proc3, proc4, ... - // block1 0 1 2 3 ... - // block2 nprocs+0 nprocs+1 nprocs+2 nprocs+3 ... - // block3 2*nprocs+0 2*nprocs+1 2*nprocs+2 2*nprocs+3 ... - // block4 - // ... - // it means each block distributes its data to all processors. - // (iblock, jproc) -> ij_blockproc - - // it is convenient to assume each processor has same number of blocks - // but in general it is not necessary - // however if our assumption is true, the `temp` would be GLOBAL block index - int temp = iblock_local * nprocs + iproc; // block is the leading dimension - - // ixx_local % nxx_block is the local index with in a block - // temp * nxx_block is then the shift, then there seems to be a two-dimensional - // coordinate like (temp, ixx_block_local). - // xx1_inblock, xx2_inblock, xx3_inblock, ... - // block1 of proc1 - // block1 of proc2 - // block1 of proc3 RETURN <- temp - // ... - // block2 of proc1 - // block2 of proc2 - // ... - // block3 of proc1 - // ... - - // In summary it is a mapping from (ixx_local, iproc) to a one-dimensional index - // named ixx_global - ixx_global = temp * nxx_block_local + ixx_block_local; -} - -/** - * @brief Reverse function of index_before2DBCD, convert global 1d flattened index to local index (ixx_local, iproc) pair - * - * @param ixx_local [out] local index of something on one processor but across all blocks - * @param iproc [out] processor id - * @param ixx_global [in] global 1d flattened index - * @param nxx_block_local [in] number of something per block, or say capacity of something per block - * @param nprocs [in] number of processors - */ -void index_after2DBCD(int& ixx_local, - int& iproc, - const int& ixx_global, - const int& nxx_block_local, - const int& nprocs) -{ - // calculate the total number of elements in blocks with the same index across all processors - int nxx_block = nxx_block_local * nprocs; - // therefore, the index of block is (the index of block on each processor) - int iblock = ixx_global / nxx_block; - // and index within the concatenation of all blocks is - int ixx_block = ixx_global % nxx_block; - // then the processor index is - iproc = ixx_block / nxx_block_local; - - // so on each processor, there are nxx_block_local elements in each "local block", - // the index iblock also indicates there are "iblock" of "local blocks" ahead of - // the element of interest, therefore, the total number of elements ahead will be - // the shift on index - int shift = iblock * nxx_block_local; - // calculate the index within a "local block" - int ixx_block_local = ixx_global % nxx_block_local; - // add together - ixx_local = shift + ixx_block_local; -} - -/** - * @brief distribute manually the data from a 2D array to a 1D array, the data is distributed according to the 2D Block-Cyclic-Distribution (2D-BCD) strategy - * - * @param iproc present processor index - * @param nxx_block_local number of element in a block per processor - * @param dst destination where data copied to - * @param nrows_dst (ScaLAPACK: MB), number of rows of block per processor - * @param ncols_dst (ScaLAPACK: NB), number of columns of block per processor - * @param src source storing data - * @param iproc_row (ScaLAPACK: MYPROW), index of processor for row in processor grid - * @param iproc_col (ScaLAPACK: MYPCOL), index of processor for column in processor grid - * @param nprocs_row (ScaLAPACK: NPROW), number of processors for row in processor grid - * @param nprocs_col (ScaLAPACK: NPCOL), number of processors for column in processor grid - * @param nrows_src maximal number of rows of source - * @param ncols_src maximal number of columns of source - * - * @attention FORTRAN and C++ style data layout is different, the data layout in C++ is row-major, but in FORTRAN it is column-major. - * - * @note find more information on BLACS quick reference: https://netlib.org/blacs/BLACS/QRef.html - */ -void distribute(const int& iproc, //< [in] (myid) - const int& nxx_block_local, //< [in] (nb) - double* dst, //< [out] (work) - const int& nrows_dst, //< [in] (naroc[0]) - const int& ncols_dst, //< [in] (naroc[1]) - double** src, //< [in] (CTOT) - const int& iproc_row, //< [in] (iprow) - const int& iproc_col, //< [in] (ipcol) - const int& nprocs_row, //< [in] (dim0) - const int& nprocs_col, //< [in] (dim1) - const int& nrows_src = -1, //< [in] (nbands) - const int& ncols_src = -1) //< [in] new parameter -{ - // FIRST DEFINE THE PROCESSOR INFORMATION - // workflow consistency judgement: only processor 0 can output the data - // but all processors have the same workflow - if(iproc != 0) return; - for(int i = 0; i < nrows_dst; i++) // loop over the leading dimension - { - int irow_global; - // reversely find what is the element correspond to the element (i, j) on - // processor (iproc_row, iproc_col) - // so calculate the global index, or say index of the large matrix to - // distribute - index_before2DBCD(i, // local index - iproc_row, // processor index for row - irow_global, // global row index - nxx_block_local, // number of elements in a block - nprocs_row); // number of processors for row - // check if the global index is out of range - // WHY PRETEND LIKE NOTHING HAPPENED? - if(irow_global >= nrows_src) continue; - // if valid... - for(int j = 0; j < ncols_dst; j++) // loop over the second dimension - { - int icol_global; - index_before2DBCD(j, iproc_col, icol_global, nxx_block_local, nprocs_col); - // check if the global index is out of range - // WHY PRETEND LIKE NOTHING HAPPENED? - if(icol_global >= ncols_src) continue; - // if not, access the memory. - // HERE IS THE PLACE CONTROLLING THE DIFFERENT DATA LAYOUT BETWEEN C++ AND FORTRAN - // C++ style: row-major, memory of elements in a row is continuous - // int flatten_index = i * ncols_dst + j; - // FORTRAN style: column-major, memory of elements in a column is continuous - int flatten_index = j * nrows_dst + i; - dst[flatten_index] = src[irow_global][icol_global]; - } - } -} -/** - * @brief overloaded version of `distribute` for complex data - * - * @param iproc present processor index - * @param nxx_block_local number of element in a block per processor - * @param dst destination where data copied to - * @param nrows_dst (ScaLAPACK: MB), number of rows of block per processor - * @param ncols_dst (ScaLAPACK: NB), number of columns of block per processor - * @param src source storing data - * @param iproc_row (ScaLAPACK: MYPROW), index of processor for row in processor grid - * @param iproc_col (ScaLAPACK: MYPCOL), index of processor for column in processor grid - * @param nprocs_row (ScaLAPACK: NPROW), number of processors for row in processor grid - * @param nprocs_col (ScaLAPACK: NPCOL), number of processors for column in processor grid - * @param nrows_src maximal number of rows of source - * @param ncols_src maximal number of columns of source - * - * @attention FORTRAN and C++ style data layout is different, the data layout in C++ is row-major, but in FORTRAN it is column-major. - */ -void distribute(const int& iproc, //< [in] (myid) - const int& nxx_block_local, //< [in] (nb) - std::complex* dst, //< [out] (work) - const int& nrows_dst, //< [in] (naroc[0]) - const int& ncols_dst, //< [in] (naroc[1]) - std::complex** src, //< [in] (CTOT) - const int& iproc_row, //< [in] (iprow) - const int& iproc_col, //< [in] (ipcol) - const int& nprocs_row, //< [in] (dim0) - const int& nprocs_col, //< [in] (dim1) - const int& nrows_src = -1, //< [in] (nbands) - const int& ncols_src = -1) //< [in] new parameter -{ - if(iproc != 0) return; - for(int i = 0; i < nrows_dst; i++) - { - int irow_global; - index_before2DBCD(i, iproc_row, irow_global, nxx_block_local, nprocs_row); - if(irow_global >= nrows_src) continue; - for(int j = 0; j < ncols_dst; j++) - { - int icol_global; - index_before2DBCD(j, iproc_col, icol_global, nxx_block_local, nprocs_col); - if(icol_global >= ncols_src) continue; - int flatten_index = j * nrows_dst + i; - dst[flatten_index] = src[irow_global][icol_global]; - } - } -} - -/** - * @brief Reverse function of distribute, collect the data from a 1D array to a 2D array, the data is collected according to the 2D Block-Cyclic-Distribution (2D-BCD) strategy - * - * @param nxx_block_local number of element in a block per processor - * @param src - * @param nrows_src - * @param ncols_src - * @param dst - * @param iproc_row - * @param iproc_col - * @param nprocs_row - * @param nprocs_col - * @param iorbs - */ -void collect(const int& nxx_block_local, - double* src, - const int& nrows_src, - const int& ncols_src, - double** dst, - const int& iproc_row, - const int& iproc_col, - const int& nprocs_row, - const int& nprocs_col, - const int* iorbs = nullptr) -{ - if(dst == nullptr) return; - for(int i = 0; i < nrows_src; i++) // do you remember the difference between the FORTRAN and C++ matrix layouts? - { - int irow_global; - index_before2DBCD(i, iproc_row, irow_global, nxx_block_local, nprocs_row); - for(int j = 0; j < ncols_src; j++) - { - int icol_global; - index_before2DBCD(j, iproc_col, icol_global, nxx_block_local, nprocs_col); - const int icol = iorbs == nullptr ? icol_global : iorbs[icol_global]; - const int flatten_index = j * nrows_src + i; - dst[irow_global][icol] = src[flatten_index]; - } - } -} - -void map_onto_wfc(const int& nxx_block_local, - const std::vector& orb_indices, - std::complex* src, - const int& ndim1_src, - const int& ndim2_src, - std::complex** wfc, - const int& iproc_dim1, - const int& iproc_dim2, - const int& nprocs_dim1 = -1, - const int& nprocs_dim2 = -1) -{ - if(wfc == nullptr) return; - for(int i = 0; i < ndim1_src; i++) - { - int ixx_global; - index_before2DBCD(i, iproc_dim1, ixx_global, nxx_block_local, nprocs_dim1); - for(int j = 0; j < ndim2_src; j++) - { - int jxx_global; - index_before2DBCD(j, iproc_dim2, jxx_global, nxx_block_local, nprocs_dim2); - int iorb = orb_indices[jxx_global]; - #ifdef __DEBUG - assert (iorb >= 0); - assert (jxx_global >= 0); - #endif - wfc[ixx_global][iorb] = src[i * ndim2_src + j]; - } - } -} - -} // end of namespace LOWF diff --git a/source/module_psi/psi_io.h b/source/module_psi/psi_io.h new file mode 100644 index 0000000000..ccc7a86dd8 --- /dev/null +++ b/source/module_psi/psi_io.h @@ -0,0 +1,258 @@ +/** + * @file psi_io.cpp + * @author kirk0830 + * @brief This file collects both I/O of wavefunction for planewave and numerical atomic orbitals basis sets + * @version 0.1 + * @date 2024-05-30 + * + * @copyright Copyright (c) 2024 + * + */ +#include +#include +#include "module_base/formatter.h" +#include +#include +#include +#include +#include +#include "module_base/tool_quit.h" + +namespace psi +{ +namespace io +{ +template +void cast_to_stdcomplex(const std::string& src, std::vector& dest, const std::string& delimiter = " ") +{ + // there may be parentheses in the string + // so we need to remove them first + std::string str = src; + str.erase(std::remove(str.begin(), str.end(), '('), str.end()); + str.erase(std::remove(str.begin(), str.end(), ')'), str.end()); + const std::vector tokens= FmtCore::split(str, delimiter); + for (const auto& token : tokens) + { + dest.push_back(std::stod(token)); + } +} + +// read file named as LOWF_K_*.txt, the name should not be hard-coded +// structure of LOWF_K_*.txt: +// [ik] (index of k points) +// [xk] [yk] [zk] +// [nb] (number of bands) +// [nlocal] (number of orbitals) +// [ib] (band) +// [energy] (energy of the band) +// [occ] (Occupations) +// [real] [imag] [real] [imag] ... (wavefunction coefficients) +// ... +// [ib] (band) +// [energy] (energy of the band) +// [occ] (Occupations) +// [real] [imag] [real] [imag] ... (wavefunction coefficients) +// ... +/** + * @brief read ABACUS output wavefunction file named as LOWF_K_*.txt for std::complex wavefunction coefficients + * + * @param flowf file name under convention of LOWF_K_*.txt + * @param ik index of k points, will be returned + * @param kvec_c k vector in Cartesian coordinates, will be returned + * @param nbands number of bands, will be returned + * @param nlocal number of orbitals, will be returned + * @param energies energies of bands, will be returned + * @param occs occupations, will be returned + * @param lowf wavefunction coefficients, will be returned. Note! 1D array of complex numbers + */ +void read_abacus_lowf(const std::string& flowf, //<[in] LOWF_K_*.txt + int& ik, //<[out] index of k points + std::vector& kvec_c, //<[out] k vector + int& nbands, //<[out] number of bands + int& nlocal, //<[out] number of orbitals + std::vector& energies, //<[out] energies of bands + std::vector& occs, //<[out] occupations + std::vector>& lowf) //<[out] wavefunction coefficients +{ + std::ifstream ifs(flowf.c_str()); + if(!ifs) ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); + // will use line-by-line parse + std::string line; + bool read_kvec = false; + int iband = 0; + int ilocal = 0; + while (std::getline(ifs, line)) + { + // remove leading and trailing whitespaces + line = std::regex_replace(line, std::regex("^ +| +$|( ) +"), "$1"); + if(FmtCore::endswith(line, "(index of k points)")) + { + std::vector result = FmtCore::split(line); + ik = std::stoi(result[0]); + read_kvec = true; + continue; + } + if(read_kvec) + { + const std::vector result = FmtCore::split(line); + for (const auto& token : result) + { + kvec_c.push_back(std::stod(token)); + } + read_kvec = false; + continue; + } + if(FmtCore::endswith(line, "(number of bands)")) + { + std::vector result = FmtCore::split(line); + nbands = std::stoi(result[0]); + } + else if(FmtCore::endswith(line, "(number of orbitals)")) + { + std::vector result = FmtCore::split(line); + nlocal = std::stoi(result[0]); + } + else if(FmtCore::endswith(line, "(band)")) + { + std::vector result = FmtCore::split(line); + #ifdef __DEBUG + assert (ilocal == 0)||(ilocal == nlocal); + #endif + iband = std::stoi(result[0]); + ilocal = 0; // reset ilocal + } + else if(FmtCore::endswith(line, "(Ry)")) + { + std::vector result = FmtCore::split(line); + energies.push_back(std::stod(result[0])); + } + else if(FmtCore::endswith(line, "(Occupations)")) + { + std::vector result = FmtCore::split(line); + occs.push_back(std::stod(result[0])); + #ifdef __DEBUG + assert (ilocal == 0); + #endif + } + else// read wavefunction coefficients + { + const std::vector result = FmtCore::split(line); + for (const auto& token : result) + { + std::vector temp; + cast_to_stdcomplex(token, temp); + for (int i = 0; i < temp.size(); i += 2) + { + lowf.push_back(std::complex(temp[i], temp[i + 1])); + } + ilocal += temp.size() / 2; + } + } + } + #ifdef __DEBUG + assert (lowf.size() == nbands * nlocal); + assert (iband == nbands); + assert (ilocal == nlocal); + #endif +} +// overload for the case of double wavefunction coefficients +/** + * @brief double overloaded version of read_abacus_lowf, read wavefunction coefficients as double + * + * @param flowf file name under convention of LOWF_K_*.txt + * @param ik index of k points, will be returned + * @param kvec_c k vector in Cartesian coordinates, will be returned + * @param nbands number of bands, will be returned + * @param nlocal number of orbitals, will be returned + * @param energies energies of bands, will be returned + * @param occs occupations, will be returned + * @param lowf wavefunction coefficients, will be returned. Note! 1D array of complex numbers + */ +void read_abacus_lowf(const std::string& flowf, //<[in] LOWF_K_*.txt + int& ik, //<[out] index of k points + std::vector& kvec_c, //<[out] k vector + int& nbands, //<[out] number of bands + int& nlocal, //<[out] number of orbitals + std::vector& energies, //<[out] energies of bands + std::vector& occs, //<[out] occupations + std::vector& lowf) //<[out] wavefunction coefficients +{ + std::ifstream ifs(flowf.c_str()); + if(!ifs) ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); + // will use line-by-line parse + std::string line; + bool read_kvec = false; + int iband = 0; + int ilocal = 0; + while (std::getline(ifs, line)) + { + // remove leading and trailing whitespaces + line = std::regex_replace(line, std::regex("^ +| +$|( ) +"), "$1"); + if(FmtCore::endswith(line, "(index of k points)")) + { + std::vector result = FmtCore::split(line); + ik = std::stoi(result[0]); + read_kvec = true; + continue; + } + if(read_kvec) + { + const std::vector result = FmtCore::split(line); + for (const auto& token : result) + { + kvec_c.push_back(std::stod(token)); + } + read_kvec = false; + continue; + } + if(FmtCore::endswith(line, "(number of bands)")) + { + std::vector result = FmtCore::split(line); + nbands = std::stoi(result[0]); + } + else if(FmtCore::endswith(line, "(number of orbitals)")) + { + std::vector result = FmtCore::split(line); + nlocal = std::stoi(result[0]); + } + else if(FmtCore::endswith(line, "(band)")) + { + std::vector result = FmtCore::split(line); + #ifdef __DEBUG + assert (ilocal == 0)||(ilocal == nlocal); + #endif + iband = std::stoi(result[0]); + ilocal = 0; // reset ilocal + } + else if(FmtCore::endswith(line, "(Ry)")) + { + std::vector result = FmtCore::split(line); + energies.push_back(std::stod(result[0])); + } + else if(FmtCore::endswith(line, "(Occupations)")) + { + std::vector result = FmtCore::split(line); + occs.push_back(std::stod(result[0])); + #ifdef __DEBUG + assert (ilocal == 0); + #endif + } + else// read wavefunction coefficients + { + const std::vector result = FmtCore::split(line); + for (const auto& token : result) + { + lowf.push_back(std::stod(token)); + ilocal += 1; + } + } + } + #ifdef __DEBUG + assert (lowf.size() == nbands * nlocal); + assert (iband == nbands); + assert (ilocal == nlocal); + #endif +} + +} // namespace io +} // namespace psi \ No newline at end of file