Skip to content

Commit

Permalink
benchmark needed now
Browse files Browse the repository at this point in the history
  • Loading branch information
kirk0830 committed Sep 20, 2024
1 parent e49797d commit 572b9c0
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 53 deletions.
2 changes: 1 addition & 1 deletion source/module_io/test/read_input_ptest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TEST_F(InputParaTest, ParaRead)
EXPECT_EQ(param.inp.nbands_istate, 5);
EXPECT_EQ(param.inp.bands_to_print.size(), 0);
EXPECT_FALSE(param.inp.if_separate_k);
EXPECT_EQ(param.inp.pw_seed, 1);
EXPECT_EQ(param.inp.pw_seed, 0);
EXPECT_EQ(param.inp.emin_sto, 0.0);
EXPECT_EQ(param.inp.emax_sto, 0.0);
EXPECT_EQ(param.inp.nche_sto, 100);
Expand Down
2 changes: 1 addition & 1 deletion source/module_parameter/input_parameter.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct Input_para
bool diago_full_acc = false; ///< all the empty states are diagonalized
std::string init_wfc = "atomic"; ///< "file","atomic","random"
bool psi_initializer = false; ///< whether use psi_initializer to initialize wavefunctions
int pw_seed = 1; ///< random seed for initializing wave functions qianrui 2021-8-12
int pw_seed = 0; ///< random seed for initializing wave functions qianrui 2021-8-12
std::string init_chg = "atomic"; ///< "file","atomic"
bool dm_to_rho = false; ///< read density matrix from npz format and calculate charge density
std::string chg_extrap = "default"; ///< xiaohui modify 2015-02-01
Expand Down
106 changes: 57 additions & 49 deletions source/module_psi/psi_initializer_nao.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,26 +124,27 @@ template <typename T, typename Device>
void psi_initializer_nao<T, Device>::allocate_table()
{
// find correct dimension for ovlp_flzjlq
int dim1 = this->p_ucell_->ntype;
int dim2 = 0; // dim2 should be the maximum number of zeta for each atomtype
int ntype = this->p_ucell_->ntype;
int lmaxmax = 0; // lmaxmax
int nzeta_max = 0; // dim3 should be the maximum number of zeta for each atomtype
for (int it = 0; it < this->p_ucell_->ntype; it++)
{
int nzeta = 0;
for (int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++)
int lmax = this->p_ucell_->atoms[it].nwl;
lmaxmax = (lmaxmax > lmax) ? lmaxmax : lmax;
for (int l = 0; l < lmax + 1; l++)
{
nzeta += this->p_ucell_->atoms[it].l_nchi[l];
}
dim2 = (nzeta > dim2) ? nzeta : dim2;
nzeta_max = (nzeta > nzeta_max) ? nzeta : nzeta_max;
}
if (dim2 == 0)
if (nzeta_max == 0)
{
ModuleBase::WARNING_QUIT("psi_initializer_nao<T, Device>::psi_initializer_nao",
"there is not ANY numerical atomic orbital read in present system, quit.");
}
int dim3 = GlobalV::NQX;
// allocate memory for ovlp_flzjlq
this->ovlp_flzjlq_.create(dim1, dim2, dim3);
this->ovlp_flzjlq_.zero_out();
// allocate a map (it, l, izeta) -> i, should allocate memory of ntype * lmax * nzeta_max
this->projmap_.create(ntype, lmaxmax + 1, nzeta_max);
}

#ifdef __MPI
Expand All @@ -167,8 +168,6 @@ void psi_initializer_nao<T, Device>::initialize(Structure_Factor* sf,
// allocate
this->allocate_table();
this->read_external_orbs(this->p_ucell_->orbital_fn, rank);
// this->cal_ovlp_flzjlq(); //because GlobalV::NQX will change during vcrelax, so it should be called in both init
// and init_after_vc
// then for generate random number to fill in the wavefunction
this->ixy2is_.clear();
this->ixy2is_.resize(this->pw_wfc_->fftnxy);
Expand All @@ -193,8 +192,6 @@ void psi_initializer_nao<T, Device>::initialize(Structure_Factor* sf,
// allocate
this->allocate_table();
this->read_external_orbs(this->p_ucell_->orbital_fn, 0);
// this->cal_ovlp_flzjlq(); //because GlobalV::NQX will change during vcrelax, so it should be called in both init
// and init_after_vc
// then for generate random number to fill in the wavefunction
this->ixy2is_.clear();
this->ixy2is_.resize(this->pw_wfc_->fftnxy);
Expand All @@ -207,31 +204,48 @@ template <typename T, typename Device>
void psi_initializer_nao<T, Device>::tabulate()
{
ModuleBase::timer::tick("psi_initializer_nao", "tabulate");
this->ovlp_flzjlq_.zero_out();

// a uniformed qgrid
std::vector<double> qgrid(GlobalV::NQX);
std::iota(qgrid.begin(), qgrid.end(), 0);
std::for_each(qgrid.begin(), qgrid.end(), [this](double& q) { q = q * GlobalV::DQ; });

// only when needed, allocate memory for cubspl_
if (this->cubspl_.get()) { this->cubspl_.reset(); }
this->cubspl_ = std::unique_ptr<ModuleBase::CubicSpline>(
new ModuleBase::CubicSpline(qgrid.size(), qgrid.data()));

// calculate the total number of radials and call reserve to allocate memory
int nchi = 0;
for (int it = 0; it < this->p_ucell_->ntype; it++)
{
for (int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++)
{
nchi += this->p_ucell_->atoms[it].l_nchi[l];
}
}
this->cubspl_->reserve(nchi);
ModuleBase::SphericalBesselTransformer sbt_(true); // bool: enable cache

// tabulate the spherical bessel transform of numerical orbital function
std::vector<double> Jlfq(GlobalV::NQX, 0.0);
int i = 0;
for (int it = 0; it < this->p_ucell_->ntype; it++)
{
int ic = 0;
for (int l = 0; l < this->p_ucell_->atoms[it].nwl + 1; l++)
{
for (int izeta = 0; izeta < this->p_ucell_->atoms[it].l_nchi[l]; izeta++)
{
std::vector<double> ovlp_flzjlq_q(GlobalV::NQX);
std::vector<double> qgrid(GlobalV::NQX);
std::iota(qgrid.begin(), qgrid.end(), 0);
std::for_each(qgrid.begin(), qgrid.end(), [this](double& q) { q = q * GlobalV::DQ; });
this->sbt.direct(l,
this->nr_[it][ic],
this->rgrid_[it][ic].data(),
this->chi_[it][ic].data(),
GlobalV::NQX,
qgrid.data(),
ovlp_flzjlq_q.data());

#pragma omp parallel for schedule(static, 4096 / sizeof(double))
for (int iq = 0; iq < GlobalV::NQX; iq++)
{
this->ovlp_flzjlq_(it, ic, iq) = ovlp_flzjlq_q[iq];
}
sbt_.direct(l,
this->nr_[it][ic],
this->rgrid_[it][ic].data(),
this->chi_[it][ic].data(),
GlobalV::NQX,
qgrid.data(),
Jlfq.data());
this->cubspl_->add(Jlfq.data());
this->projmap_(it, l, izeta) = i++; // index it
++ic;
}
}
Expand All @@ -251,16 +265,18 @@ void psi_initializer_nao<T, Device>::proj_ao_onkG(const int ik)
ModuleBase::matrix ylm(total_lm, npw);

std::vector<std::complex<double>> aux(npw);
std::vector<ModuleBase::Vector3<double>> gk(npw);
std::vector<double> qnorm(npw);
std::vector<ModuleBase::Vector3<double>> q(npw);
#pragma omp parallel for schedule(static, 4096 / sizeof(double))
for (int ig = 0; ig < npw; ig++)
{
gk[ig] = this->pw_wfc_->getgpluskcar(ik, ig);
q[ig] = this->pw_wfc_->getgpluskcar(ik, ig);
qnorm[ig] = q[ig].norm() * this->p_ucell_->tpiba;
}

ModuleBase::YlmReal::Ylm_Real(total_lm, npw, gk.data(), ylm);
ModuleBase::YlmReal::Ylm_Real(total_lm, npw, q.data(), ylm);
// int index = 0;
std::vector<double> ovlp_flzjlg(npw);
std::vector<double> Jlfq(npw, 0.0);
int ibasis = 0;
for (int it = 0; it < this->p_ucell_->ntype; it++)
{
Expand All @@ -281,18 +297,10 @@ void psi_initializer_nao<T, Device>::proj_ao_onkG(const int ik)
transformation of numerical orbital function, is indiced by it and ic, is needed to
interpolate everytime when ic updates, therefore everytime when present orbital is done
*/
#pragma omp parallel for schedule(static, 4096 / sizeof(double))
for (int ig = 0; ig < npw; ig++)
{
ovlp_flzjlg[ig] = ModuleBase::PolyInt::Polynomial_Interpolation(
this->ovlp_flzjlq_, // the spherical bessel transform of numerical orbital function
it,
ic, // each (it, ic)-pair defines a unique numerical orbital function
GlobalV::NQX,
GlobalV::DQ, // grid number and grid spacing of q
gk[ig].norm() * this->p_ucell_->tpiba // norm of (G+k) = K
);
}

// use cublic spline instead of previous polynomial interpolation
this->cubspl_->eval(npw, qnorm.data(), Jlfq.data(), nullptr, nullptr, this->projmap_(it, L, N));

/* FOR EVERY NAO IN EACH ATOM */
if (GlobalV::NSPIN == 4)
{
Expand Down Expand Up @@ -321,7 +329,7 @@ void psi_initializer_nao<T, Device>::proj_ao_onkG(const int ik)
#pragma omp parallel for
for (int ig = 0; ig < npw; ig++)
{
aux[ig] = sk[ig] * ylm(lm, ig) * ovlp_flzjlg[ig];
aux[ig] = sk[ig] * ylm(lm, ig) * Jlfq[ig];
}

#pragma omp parallel for
Expand Down Expand Up @@ -361,7 +369,7 @@ void psi_initializer_nao<T, Device>::proj_ao_onkG(const int ik)
for (int ig = 0; ig < npw; ig++)
{
(*(this->psig_))(ibasis, ig)
= this->template cast_to_T<T>(lphase * sk[ig] * ylm(lm, ig) * ovlp_flzjlg[ig]);
= this->template cast_to_T<T>(lphase * sk[ig] * ylm(lm, ig) * Jlfq[ig]);
}
++ibasis;
}
Expand Down
8 changes: 6 additions & 2 deletions source/module_psi/psi_initializer_nao.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
#define PSI_INITIALIZER_NAO_H
#include "psi_initializer.h"
#include "module_base/realarray.h"

#include "module_base/cubic_spline.h"
#include <memory>
/*
Psi (planewave based wavefunction) initializer: numerical atomic orbital method
*/
Expand Down Expand Up @@ -54,7 +55,10 @@ class psi_initializer_nao : public psi_initializer<T, Device>

private:
std::vector<std::string> orbital_files_;
ModuleBase::realArray ovlp_flzjlq_;
/// @brief cubic spline for interpolation
std::unique_ptr<ModuleBase::CubicSpline> cubspl_;
/// @brief radial map, [itype][l][izeta] -> i
ModuleBase::realArray projmap_;
/// @brief number of realspace grids per type per chi, [itype][ichi]
std::vector<std::vector<int>> nr_;
/// @brief data of numerical atomic orbital per type per chi per position, [itype][ichi][ir]
Expand Down

0 comments on commit 572b9c0

Please sign in to comment.