Skip to content

Commit

Permalink
replace cal_stress in GPU relax calculation,use NUM_STREAM in INPUT…
Browse files Browse the repository at this point in the history
… file and fix memory leak in force calculation (#4200)

* replace ParaV in module gint

* change PV to pv in module gint

* change GlobalC in module gint

* fix LCAO_Orbitals in module gint

* fix error in compile without abacus

* fix error in init_gpu_gint_variables

* remove GlobalC in grid_technique and grid_bigcell

* remove GlobalC in gint_tools and vbatch matrix

* fix relax have compute stress and change GPU force compute to acclerate

* fix num stream in input.md and use num_stream in input

* fix error in compute force

* fix memory error in force compute

* use std instead of double * and add const

* fix error in vector use

* fix error in compile

* fix error in compile with force

* fix compile error

* fix paramter name and function name

* add time ticker and fix nspin transport

* delete printf in files

* fix test bug and fix grid_size

* init nstreams

---------

Co-authored-by: Mohan Chen <[email protected]>
  • Loading branch information
A-006 and mohanchen committed May 27, 2024
1 parent 79b9111 commit 992ad9b
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 111 deletions.
3 changes: 2 additions & 1 deletion docs/advanced/input_files/input-main.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
- [search\_radius](#search_radius)
- [search\_pbc](#search_pbc)
- [bx, by, bz](#bx-by-bz)
- [num\_stream] (#num_stream)
- [num\_stream](#num_stream)
- [Electronic structure](#electronic-structure)
- [basis\_type](#basis_type)
- [ks\_solver](#ks_solver)
Expand Down Expand Up @@ -915,6 +915,7 @@ These variables are used to control the numerical atomic orbitals related parame
- **Description**: choose the number of streams in GPU when we compute the `LCAO`. According to different devices , we may have different effects.For most devices,the stream is
enough when the number is bigger then 2.
- **Default** : "4"

[back to top](#full-list-of-input-keywords)

## Electronic structure
Expand Down
3 changes: 2 additions & 1 deletion source/module_esolver/esolver_ks_lcao_elec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ void ESolver_KS_LCAO<TK, TR>::set_matrix_grid(Record_adj& ra)
this->pw_rho->nplane,
this->pw_rho->startz_current,
GlobalC::ucell,
GlobalC::ORB);
GlobalC::ORB,
GlobalV::NUM_STREAM);

// (2)For each atom, calculate the adjacent atoms in different cells
// and allocate the space for H(R) and S(R).
Expand Down
70 changes: 35 additions & 35 deletions source/module_hamilt_lcao/module_gint/gint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ Gint::~Gint()
void Gint::cal_gint(Gint_inout* inout)
{
ModuleBase::timer::tick("Gint_interface", "cal_gint");

if(inout->job==Gint_Tools::job_type::vlocal)
{
ModuleBase::TITLE("Gint_interface","cal_gint_vlocal");
Expand Down Expand Up @@ -132,45 +131,47 @@ void Gint::cal_gint(Gint_inout* inout)
{
const int ncyz = this->ny * this->nplane;
int nat = ucell.nat;
// for (int is = 0; is < GlobalV::NSPIN; ++is)
// {
double *force = new double[ucell.nat * 3];
for (int i = 0; i < nat * 3; i++)
{
force[i] = 0.0;
}
double *stress = new double[6];
for (int i = 0; i < 6; i++)
{
stress[i] = 0.0;
}
GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
ucell.omega
/ this->ncxyz,
inout->vl,
force,
stress,
this->nplane,
dr,
rcut,
*this->gridt,
ucell);
const int isforce = inout->isforce;
const int isstress =inout->isstress;
ModuleBase::TITLE("Gint_interface","cal_force_gpu");
ModuleBase::timer::tick("Gint_interface","cal_force_gpu");
if (isforce || isstress){
std::vector<double> force(nat * 3, 0.0);
std::vector<double> stress(6, 0.0);
GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin],
ucell.omega
/ this->ncxyz,
inout->vl,
force,
stress,
this->nplane,
dr,
rcut,
isforce,
isstress,
*this->gridt,
ucell);
if (inout->isforce)
{
for (int iat = 0; iat < nat; iat++)
{
inout->fvl_dphi[0](iat, 0) += force[iat * 3];
inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
}
inout->svl_dphi[0](0, 0) += stress[0];
inout->svl_dphi[0](0, 1) += stress[1];
inout->svl_dphi[0](0, 2) += stress[2];
inout->svl_dphi[0](1, 1) += stress[3];
inout->svl_dphi[0](1, 2) += stress[4];
inout->svl_dphi[0](2, 2) += stress[5];

delete[] force;
delete[] stress;
// }
}
if (inout->isstress){
inout->svl_dphi[0](0, 0) += stress[0];
inout->svl_dphi[0](0, 1) += stress[1];
inout->svl_dphi[0](0, 2) += stress[2];
inout->svl_dphi[0](1, 1) += stress[3];
inout->svl_dphi[0](1, 2) += stress[4];
inout->svl_dphi[0](2, 2) += stress[5];
}
force.clear();
stress.clear();
}
ModuleBase::timer::tick("Gint_interface","cal_force_gpu");
}
}
else
Expand Down Expand Up @@ -310,7 +311,6 @@ void Gint::cal_gint(Gint_inout* inout)
this->nplane, this->gridt->start_ind[grid_index], ncyz, dv);

double** DM_in;

if(GlobalV::GAMMA_ONLY_LOCAL)
{
DM_in = inout->DM[GlobalV::CURRENT_SPIN];
Expand Down
50 changes: 26 additions & 24 deletions source/module_hamilt_lcao/module_gint/gint_force.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ typedef struct
double** matrix_A_device;
double** matrix_B_device;
double** matrix_C_device;
} SGridParameter;
} grid_para;

typedef struct
{
Expand All @@ -55,14 +55,14 @@ typedef struct
int* iat_device;
int* iat_host;

} ForceStressIat;
} frc_strs_iat;

typedef struct
{
double* stress_global;
double* force_global;
int* iat_global;
} ForceStressIatGlobal;
} frc_strs_iat_gbl;

typedef struct
{
Expand All @@ -84,14 +84,16 @@ typedef struct
* @param ylmcoef_now Coefficients for spherical harmonics.
* @param gridt Reference to Grid_Technique object.
*/
void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
const double vfactor,
const double* vlocal,
double* force,
double* stress,
std::vector<double>& force,
std::vector<double>& stress,
const int nczp,
double dr,
double* rcut,
const int isforce,
const int isstress,
const Grid_Technique& gridt,
const UnitCell& ucell);

Expand Down Expand Up @@ -136,15 +138,15 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
int& max_m,
int& max_n,
int& atom_pair_num,
SGridParameter& para);
grid_para& para);
/**
* @brief Density Matrix,force Stress Iat Init
*
* Using structure to init the parameter
*
* @param denstiy_mat DensityMat,contained the density_mat_dice and
* destiyMatHost
* @param f_s_iat_dev ForceStressIatGlobal,contined the Force Stress and
* @param f_s_iat_dev frc_strs_iat_gbl,contined the Force Stress and
* Iat Number
* @param dm hamilt::HContainer,denstiy stored in the Hcontainer
* @param gridt Grid_Technique,stored the major method in the the gint.
Expand All @@ -154,7 +156,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
* @param atom_num_grid in force calculate,used for Block nums
*/
void calculateInit(DensityMat& denstiy_mat,
ForceStressIatGlobal& f_s_iat_dev,
frc_strs_iat_gbl& f_s_iat_dev,
hamilt::HContainer<double>* dm,
const Grid_Technique& gridt,
const UnitCell& ucell,
Expand Down Expand Up @@ -187,28 +189,28 @@ void allocateDm(double* matrix_host,
* @param nbz int,stand for the number of Z-axis
* @param gridt Grid_Technique,stored the major method in the the gint.
*/
void para_init(SGridParameter& para,
void para_init(grid_para& para,
const int iter_num,
const int nbz,
const Grid_Technique& gridt);
/**
* @brief ForceStressIat on host and device Init
* @brief frc_strs_iat on host and device Init
*
* GridParameter init
*
* @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host
* @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
* @param stream_num int , record the stream in GPU
* @param cuda_block in stress compute,used for Block nums
* @param atom_num_grid in force calculate,used for Block nums
* @param max_size Maximum size of atoms on a grid.
* @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host
* @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
*/
void cal_init(ForceStressIat& f_s_iat,
void cal_init(frc_strs_iat& f_s_iat,
const int stream_num,
const int cuda_block,
const int atom_num_grid,
const int max_size,
const ForceStressIatGlobal& f_s_iatg);
frc_strs_iat_gbl& f_s_iatg);
/**
* @brief GridParameter memCpy,from Host to Device
*
Expand All @@ -219,46 +221,46 @@ void cal_init(ForceStressIat& f_s_iat,
* @param nbz int,stand for the number of Z-axis
* @param atom_num_grid in force calculate,used for Block nums
*/
void para_mem_copy(SGridParameter& para,
void para_mem_copy(grid_para& para,
const Grid_Technique& gridt,
const int nbz,
const int atom_num_grid);
/**
* @brief Force Stress Force Iat memCpy,from Host to Device
*
* @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
* @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
* and Host
* @param gridt Grid_Technique,stored the major method in the the gint.
* @param atom_num_grid in force calculate,used for Block nums
* @param cuda_block in stress compute,used for Block nums
* @param stream_num int , record the stream in GPU
*/
void cal_mem_cpy(ForceStressIat& f_s_iat,
void cal_mem_cpy(frc_strs_iat& f_s_iat,
const Grid_Technique& gridt,
const int atom_num_grid,
const int cuda_block,
const int stream_num);
/**
* @brief Force Calculate on Host
*
* @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
* @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
* and Host
* @param force stored the force for each atom on each directions
* @param atom_num_grid in force calculate,used for Block nums
*/
void cal_force_add(ForceStressIat& f_s_iat,
double* force,
void cal_force_add(frc_strs_iat& f_s_iat,
std::vector<double>& force,
const int atom_num_grid);
/**
* @brief Stress Calculate on Host
*
* @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
* @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
* and Host
* @param stress stored the stress for each directions
* @param cuda_block in stress compute,used for Block nums
*/
void cal_stress_add(ForceStressIat& f_s_iat,
double* stress,
void cal_stress_add(frc_strs_iat& f_s_iat,
std::vector<double>& stress,
const int cuda_block);
} // namespace GintKernel
#endif
Loading

0 comments on commit 992ad9b

Please sign in to comment.