From 992ad9b2600358b6bc0d6fd3e22d13b05592013a Mon Sep 17 00:00:00 2001
From: liiutao <74701833+A-006@users.noreply.github.com>
Date: Mon, 27 May 2024 15:13:50 +0800
Subject: [PATCH] =?UTF-8?q?replace=20`cal=5Fstress`=20in=20GPU=20relax=20c?=
 =?UTF-8?q?alculation=EF=BC=8Cuse=20NUM=5FSTREAM=20in=20INPUT=20file=20and?=
 =?UTF-8?q?=20fix=20memory=20leak=20in=20force=20calculation=20(#4200)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* replace ParaV in module gint

* change PV to pv in module gint

* change GlobalC in module gint

* fix LCAO_Orbitals in module gint

* fix error in compile without abacus

* fix error in init_gpu_gint_variables

* remove GlobalC in grid_technique and grid_bigcell

* remove GlobalC in gint_tools and vbatch matrix

* fix relax have compute stress and change GPU force compute to acclerate

* fix num stream in input.md and use num_stream in input

* fix error in compute force

* fix memory error in force compute

* use std instead of double * and add const

* fix error in vector use

* fix error in compile

* fix error in compile with force

* fix compile error

* fix paramter name and function name

* add time ticker and fix nspin transport

* delete printf in files

* fix test bug and fix grid_size

* init nstreams

---------

Co-authored-by: Mohan Chen <mohan.chen.chen.mohan@gmail.com>
---
 docs/advanced/input_files/input-main.md       |  3 +-
 .../module_esolver/esolver_ks_lcao_elec.cpp   |  3 +-
 .../module_hamilt_lcao/module_gint/gint.cpp   | 70 +++++++++----------
 .../module_gint/gint_force.h                  | 50 ++++++-------
 .../module_gint/gint_force_gpu.cu             | 52 +++++++-------
 .../module_gint/grid_technique.cpp            |  9 ++-
 .../module_gint/grid_technique.h              |  9 +--
 .../module_gint/gtask_force.cpp               |  2 +-
 .../module_gint/kernels/cuda/gint_force.cu    | 33 +++++----
 9 files changed, 120 insertions(+), 111 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index 9e87b52dcc..970dd4fd72 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -61,7 +61,7 @@
     - [search\_radius](#search_radius)
     - [search\_pbc](#search_pbc)
     - [bx, by, bz](#bx-by-bz)
-    - [num\_stream] (#num_stream)
+    - [num\_stream](#num_stream)
   - [Electronic structure](#electronic-structure)
     - [basis\_type](#basis_type)
     - [ks\_solver](#ks_solver)
@@ -915,6 +915,7 @@ These variables are used to control the numerical atomic orbitals related parame
 - **Description**: choose the number of streams in GPU when we compute the `LCAO`. According to different devices , we may have different effects.For most devices,the stream is
 enough when the number is bigger then 2.
 - **Default** : "4" 
+
 [back to top](#full-list-of-input-keywords)
 
 ## Electronic structure
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index b3d8e975b5..263520e5f1 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -66,7 +66,8 @@ void ESolver_KS_LCAO<TK, TR>::set_matrix_grid(Record_adj& ra)
                              this->pw_rho->nplane,
                              this->pw_rho->startz_current,
                              GlobalC::ucell,
-                             GlobalC::ORB);
+                             GlobalC::ORB,
+                             GlobalV::NUM_STREAM);
 
     // (2)For each atom, calculate the adjacent atoms in different cells
     // and allocate the space for H(R) and S(R).
diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 2b86e881dc..ae3205a187 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -36,7 +36,6 @@ Gint::~Gint()
 void Gint::cal_gint(Gint_inout* inout)
 {
 	ModuleBase::timer::tick("Gint_interface", "cal_gint");
-
 	if(inout->job==Gint_Tools::job_type::vlocal)
 	{
 		ModuleBase::TITLE("Gint_interface","cal_gint_vlocal");
@@ -132,45 +131,47 @@ void Gint::cal_gint(Gint_inout* inout)
             {
                 const int ncyz = this->ny * this->nplane;
                 int nat = ucell.nat;
-                // for (int is = 0; is < GlobalV::NSPIN; ++is)
-                // {
-                    double *force = new double[ucell.nat * 3];
-                    for (int i = 0; i < nat * 3; i++)
-                    {
-                        force[i] = 0.0;
-                    }
-                    double *stress = new double[6];
-                    for (int i = 0; i < 6; i++)
-                    {
-                        stress[i] = 0.0;
-                    }
-                    GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
-                                                     ucell.omega
-                                                         / this->ncxyz,
-                                                     inout->vl,
-                                                     force,
-                                                     stress,
-                                                     this->nplane,
-                                                     dr,
-                                                     rcut,
-                                                     *this->gridt,
-                                                     ucell);
+                const int isforce = inout->isforce;
+                const int isstress =inout->isstress;
+                ModuleBase::TITLE("Gint_interface","cal_force_gpu");
+                ModuleBase::timer::tick("Gint_interface","cal_force_gpu");
+                if (isforce || isstress){
+                    std::vector<double> force(nat * 3, 0.0);
+                    std::vector<double> stress(6, 0.0);
+                    GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin],
+                                                    ucell.omega
+                                                        / this->ncxyz,
+                                                    inout->vl,
+                                                    force,
+                                                    stress,
+                                                    this->nplane,
+                                                    dr,
+                                                    rcut,
+                                                    isforce,
+                                                    isstress,
+                                                    *this->gridt,
+                                                    ucell);
+                if (inout->isforce)
+                {
                     for (int iat = 0; iat < nat; iat++)
                     {
                         inout->fvl_dphi[0](iat, 0) += force[iat * 3];
                         inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
                         inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
                     }
-                        inout->svl_dphi[0](0, 0) += stress[0];
-                        inout->svl_dphi[0](0, 1) += stress[1];
-                        inout->svl_dphi[0](0, 2) += stress[2];
-                        inout->svl_dphi[0](1, 1) += stress[3];
-                        inout->svl_dphi[0](1, 2) += stress[4];
-                        inout->svl_dphi[0](2, 2) += stress[5];
-                    
-                    delete[] force;
-                    delete[] stress;
-                // }
+                }
+                if (inout->isstress){
+                    inout->svl_dphi[0](0, 0) += stress[0];
+                    inout->svl_dphi[0](0, 1) += stress[1];
+                    inout->svl_dphi[0](0, 2) += stress[2];
+                    inout->svl_dphi[0](1, 1) += stress[3];
+                    inout->svl_dphi[0](1, 2) += stress[4];
+                    inout->svl_dphi[0](2, 2) += stress[5];
+                }
+                force.clear();
+                stress.clear();
+                }
+                ModuleBase::timer::tick("Gint_interface","cal_force_gpu");   
             }
         }
         else
@@ -310,7 +311,6 @@ void Gint::cal_gint(Gint_inout* inout)
                         this->nplane, this->gridt->start_ind[grid_index], ncyz, dv);
 
                     double** DM_in;
-
 					if(GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						DM_in = inout->DM[GlobalV::CURRENT_SPIN];
diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index 6b6718e142..51851082de 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -44,7 +44,7 @@ typedef struct
     double** matrix_A_device;
     double** matrix_B_device;
     double** matrix_C_device;
-} SGridParameter;
+} grid_para;
 
 typedef struct
 {
@@ -55,14 +55,14 @@ typedef struct
     int* iat_device;
     int* iat_host;
 
-} ForceStressIat;
+} frc_strs_iat;
 
 typedef struct
 {
     double* stress_global;
     double* force_global;
     int* iat_global;
-} ForceStressIatGlobal;
+} frc_strs_iat_gbl;
 
 typedef struct
 {
@@ -84,14 +84,16 @@ typedef struct
  * @param ylmcoef_now Coefficients for spherical harmonics.
  * @param gridt Reference to Grid_Technique object.
  */
-void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
+void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          double* force,
-                          double* stress,
+                          std::vector<double>& force,
+                          std::vector<double>& stress,
                           const int nczp,
                           double dr,
                           double* rcut,
+                          const int isforce,
+                          const int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell);
 
@@ -136,7 +138,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
                               int& max_m,
                               int& max_n,
                               int& atom_pair_num,
-                              SGridParameter& para);
+                              grid_para& para);
 /**
  * @brief Density Matrix,force Stress Iat Init
  *
@@ -144,7 +146,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
  *
  * @param denstiy_mat DensityMat,contained the density_mat_dice and
  * destiyMatHost
- * @param f_s_iat_dev ForceStressIatGlobal,contined the Force Stress and
+ * @param f_s_iat_dev frc_strs_iat_gbl,contined the Force Stress and
  * Iat Number
  * @param dm hamilt::HContainer,denstiy stored in the Hcontainer
  * @param gridt Grid_Technique,stored the major method in the the gint.
@@ -154,7 +156,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void calculateInit(DensityMat& denstiy_mat,
-                   ForceStressIatGlobal& f_s_iat_dev,
+                   frc_strs_iat_gbl& f_s_iat_dev,
                    hamilt::HContainer<double>* dm,
                    const Grid_Technique& gridt,
                    const UnitCell& ucell,
@@ -187,28 +189,28 @@ void allocateDm(double* matrix_host,
  * @param nbz int,stand for the number of Z-axis
  * @param gridt Grid_Technique,stored the major method in the the gint.
  */
-void para_init(SGridParameter& para,
+void para_init(grid_para& para,
                        const int iter_num,
                        const int nbz,
                        const Grid_Technique& gridt);
 /**
- * @brief ForceStressIat on host and device Init
+ * @brief frc_strs_iat on host and device Init
  *
  * GridParameter init
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
  * @param stream_num int , record the stream in GPU
  * @param cuda_block in stress compute,used for Block nums
  * @param atom_num_grid in force calculate,used for Block nums
  * @param max_size Maximum size of atoms on a grid.
- * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host
+ * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
  */
-void cal_init(ForceStressIat& f_s_iat,
+void cal_init(frc_strs_iat& f_s_iat,
                         const int stream_num,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        const ForceStressIatGlobal& f_s_iatg);
+                        frc_strs_iat_gbl& f_s_iatg);
 /**
  * @brief GridParameter memCpy,from Host to Device
  *
@@ -219,21 +221,21 @@ void cal_init(ForceStressIat& f_s_iat,
  * @param nbz int,stand for the number of Z-axis
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void para_mem_copy(SGridParameter& para,
+void para_mem_copy(grid_para& para,
                          const Grid_Technique& gridt,
                          const int nbz,
                          const int atom_num_grid);
 /**
  * @brief Force Stress Force Iat memCpy,from Host to Device
  *
- *  @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  *  @param gridt Grid_Technique,stored the major method in the the gint.
  *  @param atom_num_grid in force calculate,used for Block nums
  *  @param cuda_block in stress compute,used for Block nums
  *  @param stream_num int , record the stream in GPU
  */
-void cal_mem_cpy(ForceStressIat& f_s_iat,
+void cal_mem_cpy(frc_strs_iat& f_s_iat,
                           const Grid_Technique& gridt,
                           const int atom_num_grid,
                           const int cuda_block,
@@ -241,24 +243,24 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
 /**
  * @brief Force Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param force stored the force for each atom on each directions
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void cal_force_add(ForceStressIat& f_s_iat,
-                    double* force,
+void cal_force_add(frc_strs_iat& f_s_iat,
+                    std::vector<double>& force,
                     const int atom_num_grid);
 /**
  * @brief Stress Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param stress stored the stress for each directions
  * @param cuda_block in stress compute,used for Block nums
  */
-void cal_stress_add(ForceStressIat& f_s_iat,
-                     double* stress,
+void cal_stress_add(frc_strs_iat& f_s_iat,
+                     std::vector<double>& stress,
                      const int cuda_block);
 } // namespace GintKernel
 #endif
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 30ea8fa6ab..3718f48819 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -8,13 +8,12 @@
 #include "kernels/cuda/gint_force.cuh"
 #include "module_base/ylm.h"
 #include "module_hamilt_lcao/module_gint/gint_tools.h"
-
 namespace GintKernel
 {
 
 // Function to calculate forces using GPU-accelerated gamma point Gint
 /**
- * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function.
+ * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function.
  *
  * This function calculates forces and stresses based on given parameters.
  *
@@ -30,7 +29,7 @@ namespace GintKernel
  */
 /**
  * Function to calculate forces using GPU-accelerated gamma point Gint
- * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function.
+ * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function.
  *
  * This function calculates forces and stresses based on given parameters.
  *
@@ -55,14 +54,16 @@ namespace GintKernel
  * 6. force dot on the GPU.
  * 7. Copy the results back to the host.
  */
-void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
+void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          double* force,
-                          double* stress,
+                          std::vector<double>& force,
+                          std::vector<double>& stress,
                           const int nczp,
                           double dr,
                           double* rcut,
+                          const int isforce,
+                          const int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell)
 {
@@ -77,9 +78,9 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
         = std::min(64, (gridt.psir_size + cuda_threads - 1) / cuda_threads);
     int iter_num = 0;
     DensityMat denstiy_mat;
-    ForceStressIatGlobal f_s_iat_dev;
-    SGridParameter para;
-    ForceStressIat f_s_iat;
+    frc_strs_iat_gbl f_s_iat_dev;
+    grid_para para;
+    frc_strs_iat f_s_iat;
 
     calculateInit(denstiy_mat,
                   f_s_iat_dev,
@@ -104,13 +105,13 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
             int max_m = 0;
             int max_n = 0;
             int atom_pair_num = 0;
-            dim3 grid_psi(nbz, 8);
+            dim3 grid_psi(nbz, 32);
             dim3 block_psi(64);
             dim3 grid_dot_force(cuda_block);
             dim3 block_dot_force(cuda_threads);
             dim3 grid_dot(cuda_block);
             dim3 block_dot(cuda_threads);
-
+            
             para_init(para, iter_num, nbz, gridt);
             cal_init(f_s_iat,
                                para.stream_num,
@@ -118,8 +119,6 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                atom_num_grid,
                                max_size,
                                f_s_iat_dev);
-            checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
-
             /*gpu task compute in CPU */
             gpu_task_generator_force(gridt,
                                      ucell,
@@ -150,6 +149,7 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                  para.stream_num);
             checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
             /* cuda stream compute and Multiplication of multinomial matrices */
+            
             get_psi_force<<<grid_psi,
                             block_psi,
                             0,
@@ -194,9 +194,8 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                      atom_pair_num,
                                      gridt.streams[para.stream_num],
                                      nullptr);
-
-            checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
             /* force compute in GPU */
+            if (isforce){
             dot_product_force<<<grid_dot_force,
                                 block_dot_force,
                                 0,
@@ -210,10 +209,9 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 nwmax,
                 max_size,
                 gridt.psir_size / nwmax);
-            /* force compute in CPU*/
-            cal_force_add(f_s_iat, force, atom_num_grid);
-
+            }
             /*stress compute in GPU*/
+            if (isstress){
             dot_product_stress<<<grid_dot,
                                  block_dot,
                                  0,
@@ -227,17 +225,21 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 para.psir_dm_device,
                 f_s_iat.stress_device,
                 gridt.psir_size);
+            }
             /* stress compute in CPU*/
-            cal_stress_add(f_s_iat, stress, cuda_block);
+            if (isstress){
+                cal_stress_add(f_s_iat, stress, cuda_block);
+            }
+            if (isforce){
+                cal_force_add(f_s_iat, force, atom_num_grid);
+            }
             iter_num++;
+            delete[] f_s_iat.stress_host;
+            delete[] f_s_iat.force_host;
+            delete[] f_s_iat.iat_host;
         }
     }
-    // cudaFree(f_s_iat.stress_device);
-    // cudaFree(f_s_iat.force_device);
-    // cudaFree(f_s_iat.iat_device);
-    delete[] f_s_iat.stress_host;
-    delete[] f_s_iat.force_host;
-    delete[] f_s_iat.iat_host;
+    delete[] denstiy_mat.density_mat_h;
     /*free variables in CPU host*/
     for (int i = 0; i < gridt.nstreams; i++)
     {
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index fdcfdc32eb..2eb74becc1 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -127,7 +127,8 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                                   const int& nplane,
                                   const int& startz_current,
                                   const UnitCell& ucell,
-                                  const LCAO_Orbitals& orb)
+                                  const LCAO_Orbitals& orb,
+                                  const int num_stream)
 {
     ModuleBase::TITLE("Grid_Technique", "init");
     ModuleBase::timer::tick("Grid_Technique", "init");
@@ -184,7 +185,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        this->init_gpu_gint_variables(ucell,orb);
+        this->init_gpu_gint_variables(ucell,orb,num_stream);
     }
 #endif
 
@@ -638,12 +639,14 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell)
 
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 
-void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb)
+void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream)
 {
     if (is_malloced)
     {
         free_gpu_gint_variables(this->nat);
     }
+    nstreams = num_stream;
+    streams=new cudaStream_t[nstreams];
     double ylmcoef[100];
     ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100);
     for (int i = 0; i < 100; i++)
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 898b0ffb51..b51cb30686 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -102,7 +102,8 @@ class Grid_Technique : public Grid_MeshBall
                       const int& nplane,
                       const int& startz_current,
                       const UnitCell& ucell,
-                      const LCAO_Orbitals& orb);
+                      const LCAO_Orbitals& orb,
+                      const int num_stream);
 
     /// number of elements(basis-pairs) in this processon
     /// on all adjacent atoms-pairs(Grid division)
@@ -162,8 +163,8 @@ class Grid_Technique : public Grid_MeshBall
     int atom_pair_mesh;
     int atom_pair_nbz;
 
-    const int nstreams = 4;
-    cudaStream_t streams[4];
+    int nstreams=4 ;
+    cudaStream_t* streams;
     // streams[nstreams]
     // TODO it needs to be implemented through configuration files
 
@@ -229,7 +230,7 @@ class Grid_Technique : public Grid_MeshBall
     matrix_multiple_func_type fastest_matrix_mul;
 
   private:
-    void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb);
+    void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream);
     void free_gpu_gint_variables(int nat);
 
 #endif
diff --git a/source/module_hamilt_lcao/module_gint/gtask_force.cpp b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
index 7c30d3db83..e785b4192b 100644
--- a/source/module_hamilt_lcao/module_gint/gtask_force.cpp
+++ b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
@@ -56,7 +56,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
                               int& max_m,
                               int& max_n,
                               int& atom_pair_num,
-                              SGridParameter& para)
+                              grid_para& para)
 {
     const int grid_index_ij = i * gridt.nby * gridt.nbzp + j * gridt.nbzp;
     const int nwmax = ucell.nwmax;
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index bbc1d7dcb8..a86c16cb5f 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -253,7 +253,7 @@ __global__ void dot_product_force(double* psir_lx,
     }
 }
 void calculateInit(DensityMat& denstiy_mat,
-                   ForceStressIatGlobal& f_s_iat_dev,
+                   frc_strs_iat_gbl& f_s_iat_dev,
                    hamilt::HContainer<double>* dm,
                    const Grid_Technique& gridt,
                    const UnitCell& ucell,
@@ -300,7 +300,7 @@ void calculateInit(DensityMat& denstiy_mat,
  * @param nbz int,stand for the number of Z-axis
  * @param gridt Grid_Technique,stored the major method in the the gint.
  */
-void para_init(SGridParameter& para,
+void para_init(grid_para& para,
                        int iter_num,
                        int nbz,
                        const Grid_Technique& gridt)
@@ -377,23 +377,23 @@ void para_init(SGridParameter& para,
         = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * para.stream_num];
 }
 /**
- * @brief ForceStressIat on host and device Init
+ * @brief frc_strs_iat on host and device Init
  *
  * GridParameter init
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
  * @param stream_num int , record the stream in GPU
  * @param cuda_block in stress compute,used for Block nums
  * @param atom_num_grid in force calculate,used for Block nums
  * @param max_size Maximum size of atoms on a grid.
- * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host
+ * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
  */
-void cal_init(ForceStressIat& f_s_iat,
+void cal_init(frc_strs_iat& f_s_iat,
                         const int stream_num,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        const ForceStressIatGlobal& f_s_iat_dev)
+                        frc_strs_iat_gbl& f_s_iat_dev)
 {
     const int iat_min = -max_size - 1;
     f_s_iat.stress_host = new double[6 * cuda_block];
@@ -423,7 +423,7 @@ void cal_init(ForceStressIat& f_s_iat,
  * @param nbz int,stand for the number of Z-axis
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void para_mem_copy(SGridParameter& para,
+void para_mem_copy(grid_para& para,
                          const Grid_Technique& gridt,
                          const int nbz,
                          const int atom_num_grid)
@@ -536,14 +536,14 @@ void para_mem_copy(SGridParameter& para,
 /**
  * @brief Force Stress Force Iat memCpy,from Host to Device
  *
- *  @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  *  @param gridt Grid_Technique,stored the major method in the the gint.
  *  @param atom_num_grid in force calculate,used for Block nums
  *  @param cuda_block in stress compute,used for Block nums
  *  @param stream_num int , record the stream in GPU
  */
-void cal_mem_cpy(ForceStressIat& f_s_iat,
+void cal_mem_cpy(frc_strs_iat& f_s_iat,
                           const Grid_Technique& gridt,
                           const int atom_num_grid,
                           const int cuda_block,
@@ -566,13 +566,13 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
 /*
  * @brief Force Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param force stored the force for each atom on each directions
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void cal_force_add(ForceStressIat& f_s_iat,
-                     double* force,
+void cal_force_add(frc_strs_iat& f_s_iat,
+                     std::vector<double>& force,
                     const int atom_num_grid)
 {
     checkCuda(cudaMemcpy(f_s_iat.force_host,
@@ -595,13 +595,13 @@ void cal_force_add(ForceStressIat& f_s_iat,
 /**
  * @brief Stress Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param stress stored the stress for each directions
  * @param cuda_block in stress compute,used for Block nums
  */
-void cal_stress_add(ForceStressIat& f_s_iat,
-                     double* stress,
+void cal_stress_add(frc_strs_iat& f_s_iat,
+                     std::vector<double>& stress,
                      const int cuda_block)
 {
     checkCuda(cudaMemcpy(f_s_iat.stress_host,
@@ -612,7 +612,6 @@ void cal_stress_add(ForceStressIat& f_s_iat,
     {
         for (int index = 0; index < cuda_block; index++)
         {
-            // printf("the stress is %f\n",stress[i]);
             stress[i] += f_s_iat.stress_host[i * cuda_block + index];
         }
     }