From 4356f9fe3a4ee21d1b181ade58abd3f11f4e88ca Mon Sep 17 00:00:00 2001
From: Mohan Chen <mohan.chen.chen.mohan@gmail.com>
Date: Mon, 8 Apr 2024 09:54:47 +0800
Subject: [PATCH] LCAO refactor step 4 (#3921)

* update timer, add std

* update the format of esolver_fp.cpp

* add description in esolver_fp.h

* update a few esolver files

* update esolver description for LCAO

* update some formats of esolver codes

* update esolver_ks_pw.cpp formats

* update formats of esolver_ks_pw.cpp and esolver_ks_pw.h

* update esolver_ks_lcao_tddft.cpp formats

* update format of esolver_sdft_pw.cpp

* update the format of esolver_sdft_pw_tool.cpp

* keep formating esolver_ks_pw.cpp

* formating esolver_of_interface.cpp

* change GlobalC::ucell to ucell in esolver_ks.cpp

* remove some GlobalC::ucell in esolver_sdft_pw.cpp

* refactor the code before getting rid of RA in esolver_lcao

* refactor before getting rid of LOWF

* refactor before getting rid of LCAO_hamilt.h and LCAO_matrix.h

* refactor wavefunc_in_pw

* refactor density matrix

* refactor the format cal_dm_psi.cpp

* format forces.cpp

* refactor esolver_of_tool.cpp

* change member function beforescf in Esolver to before_scf

* change afterscf to after_scf

* change updatepot to update_pot

* change eachiterinit to iter_init, change eachiterfinish to iter_finish

* refactor esolvers, change member function names of most esolvers

* reformat esolver.h

* update tests for esolvers

* add TITLE in esolver_ks_lcao

* update esolver_ks_lcao

* update esolver_lcao

* update timer::tick in esolver_lcao

* try to delete LCAO_Matrix in LCAO_Hamilt, and try to delete Parallel_Orbitals in Force_k

* fix the compiling issue with LCAO_hamilt.hpp

* try to divide the FORCE_k.cpp into several small files

* divide FORCE_k into foverlap_k.cpp ftvnl_dphi_k.cpp fvl_dphi_k.cpp fvnl_dbeta_k.cpp four files

* get rid of UHM in FORCE_k.cpp

* cannot compile, but I have modified some files in order to get rid of Gint_k and Gint_Gamma in UHM

* keep updating, cannot run

* update write_Vxc, cannot run yet

* keep updating gint_gamma and gint_k

* update LCAO_matrix.cpp

* divide force files, and update Makefile.Objects

* update LCAO_hamilt.cpp

* delete genH pointer in UHM

* divide LCAO_hamilt.cpp into small codes, grid_init.cpp is the first one

* update grid_init in esolver_ks_lcao

* add a new namespace named sparse_format, most of the functions that originally belong to LCAO_hamilt should be moved to sparse_format

* cannot find the mismatch of DFTU

* fix the DFTU error

* update

* enable the test_memory function again by setting calcalculation parameter in INPUT file

* update memory record functions

* add sparse_format_u

* keep refactoring LCAO_hamilt

* rename sparse format files, which are originally defined in LCAO_hamilt.h

* add spar_u.h and spar_u.cpp

* update sparse matrix

* add spar_exx

* update LCAO_hamilt.cpp

* tear down LCAO_hamilt.h and .cpp, DONE.

* keep updating spar

* fix the bugs after deleting LCAO_hamilt

* continue

* fix some errors when compiling

* fix compiling errors in DOS

* fix errors without uhm

* remove directly use of LM in spar_hsr.cpp

* update spa_ files

* keep cleaning the mass left by UHM

* finnaly, all files can be compiled without uhm

* previous commit has problems, now it has been fixed.

* fix makefile bugs

* fix exx compiling errors

* fix exx problems

* update exx

* fix exx

* in some sense, the exx code is a disaster

* let's fix exx again

* fix undefination

* fix a bug in LCAO Refactor Step 4, I accidently forgot to add & for Grid_Driver& _grid in output_mat_sparse.h

---------

Co-authored-by: maki49 <1579492865@qq.com>
---
 source/Makefile.Objects                       |   7 +-
 source/module_elecstate/elecstate_lcao.h      |   2 -
 source/module_esolver/esolver_ks_lcao.cpp     |  13 +-
 source/module_esolver/esolver_ks_lcao.h       |   9 +-
 .../module_esolver/esolver_ks_lcao_elec.cpp   |   9 +-
 .../module_esolver/esolver_ks_lcao_tddft.cpp  |   2 +-
 source/module_esolver/esolver_ks_lcao_tddft.h |   1 -
 .../esolver_ks_lcao_tmpfunc.cpp               |  76 +-
 source/module_hamilt_general/hamilt.h         |  26 +-
 .../hamilt_lcaodft/CMakeLists.txt             |   7 +-
 .../hamilt_lcaodft/FORCE_gamma.cpp            |  15 +-
 .../hamilt_lcaodft/FORCE_gamma.h              |   2 +-
 .../hamilt_lcaodft/FORCE_k.h                  |   2 +-
 .../hamilt_lcaodft/LCAO_hamilt.cpp            | 675 ------------------
 .../hamilt_lcaodft/LCAO_hamilt.h              |  75 --
 .../hamilt_lcaodft/LCAO_hamilt.hpp            |  56 +-
 .../hamilt_lcaodft/hamilt_lcao.h              |   3 +-
 .../hamilt_lcaodft/local_orbital_charge.h     |   2 +-
 .../{sparse_format.cpp => spar_dh.cpp}        |  75 +-
 .../hamilt_lcaodft/spar_dh.h                  |  32 +
 .../hamilt_lcaodft/spar_exx.cpp               |   4 +
 .../hamilt_lcaodft/spar_exx.h                 |  29 +
 .../hamilt_lcaodft/spar_hsr.cpp               | 301 ++++++++
 .../hamilt_lcaodft/spar_hsr.h                 |  41 ++
 .../hamilt_lcaodft/spar_st.cpp                | 195 +++++
 .../hamilt_lcaodft/spar_st.h                  |  37 +
 .../hamilt_lcaodft/spar_u.cpp                 | 246 +++++++
 .../hamilt_lcaodft/spar_u.h                   |  31 +
 .../hamilt_lcaodft/sparse_format.h            |  32 -
 .../module_tddft/evolve_elec.h                |   1 -
 source/module_hsolver/diago_bpcg.cpp          |  40 +-
 source/module_io/dos_nao.cpp                  |  62 +-
 source/module_io/dos_nao.h                    |  32 +-
 source/module_io/mulliken_charge.h            |  19 +-
 source/module_io/output_mat_sparse.cpp        |  42 +-
 source/module_io/output_mat_sparse.h          |  24 +-
 source/module_io/td_current_io.h              |   2 +-
 source/module_io/write_HS_R.cpp               | 201 ++++--
 source/module_io/write_HS_R.h                 |  29 +-
 source/module_io/write_HS_sparse.cpp          |  34 +-
 source/module_io/write_HS_sparse.h            |   9 +-
 source/module_io/write_dos_lcao.cpp           | 111 +--
 source/module_io/write_dos_lcao.h             |   7 +-
 source/module_io/write_proj_band_lcao.cpp     |  79 +-
 source/module_io/write_proj_band_lcao.h       |   4 +-
 45 files changed, 1564 insertions(+), 1137 deletions(-)
 delete mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.cpp
 delete mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h
 rename source/module_hamilt_lcao/hamilt_lcaodft/{sparse_format.cpp => spar_dh.cpp} (75%)
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.cpp
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.h
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_u.cpp
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/spar_u.h
 delete mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.h

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index aa1d627b82..f72cc9ae6c 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -501,9 +501,12 @@ OBJS_LCAO=DM_gamma.o\
       fvl_dphi_k.o\
       fvnl_dbeta_k.o\
       LCAO_gen_fixedH.o\
-      LCAO_hamilt.o\
       grid_init.o\
-      sparse_format.o\
+      spar_dh.o\
+      spar_exx.o\
+      spar_hsr.o\
+      spar_st.o\
+      spar_u.o\
       LCAO_matrix.o\
       LCAO_nnr.o\
       center2_orb-orb11.o\
diff --git a/source/module_elecstate/elecstate_lcao.h b/source/module_elecstate/elecstate_lcao.h
index 977e0a2fdb..fdc4dd080f 100644
--- a/source/module_elecstate/elecstate_lcao.h
+++ b/source/module_elecstate/elecstate_lcao.h
@@ -21,7 +21,6 @@ class ElecStateLCAO : public ElecState
                   Local_Orbital_Charge* loc_in ,
                   Gint_Gamma* gint_gamma_in,  //mohan add 2024-04-01
                   Gint_k* gint_k_in, //mohan add 2024-04-01
-                  LCAO_Hamilt* uhm_in ,
                   Local_Orbital_wfc* lowf_in ,
                   ModulePW::PW_Basis* rhopw_in ,
                   ModulePW::PW_Basis_Big* bigpw_in )
@@ -77,7 +76,6 @@ class ElecStateLCAO : public ElecState
     Local_Orbital_Charge* loc = nullptr;
     Gint_Gamma* gint_gamma = nullptr; // mohan add 2024-04-01
     Gint_k* gint_k = nullptr; // mohan add 2024-04-01
-    //LCAO_Hamilt* uhm = nullptr; // mohan modify 2024-04-01
     Local_Orbital_wfc* lowf = nullptr;
     DensityMatrix<TK,double>* DM = nullptr;
     
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index c278f7e4c6..5b4c963403 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -117,7 +117,6 @@ void ESolver_KS_LCAO<TK, TR>::init(Input& inp, UnitCell& ucell)
             &(this->LOC),
             &(this->GG), // mohan add 2024-04-01
             &(this->GK), // mohan add 2024-04-01
-            &(this->uhm),
             &(this->LOWF),
             this->pw_rho,
             this->pw_big);
@@ -131,7 +130,7 @@ void ESolver_KS_LCAO<TK, TR>::init(Input& inp, UnitCell& ucell)
     //------------------init Basis_lcao----------------------
 
     //! pass Hamilt-pointer to Operator
-    this->gen_h.LM = this->uhm.LM = &this->LM;
+    this->gen_h.LM = &this->LM;
 
     //! pass basis-pointer to EState and Psi
     this->LOC.ParaV = this->LOWF.ParaV = this->LM.ParaV = &(this->orb_con.ParaV);
@@ -259,7 +258,6 @@ void ESolver_KS_LCAO<TK, TR>::init_after_vc(Input& inp, UnitCell& ucell)
 				&(this->LOC),
                 &(this->GG), // mohan add 2024-04-01
                 &(this->GK), // mohan add 2024-04-01
-				&(this->uhm),
 				&(this->LOWF),
 				this->pw_rho,
 				this->pw_big);
@@ -426,7 +424,7 @@ void ESolver_KS_LCAO<TK, TR>::post_process(void)
     {
         ModuleIO::write_proj_band_lcao(
             this->psi,
-            this->uhm,
+            this->LM,
             this->pelec,
             this->kv,
             GlobalC::ucell,
@@ -437,7 +435,8 @@ void ESolver_KS_LCAO<TK, TR>::post_process(void)
     {
 		ModuleIO::out_dos_nao(
 				this->psi,
-				this->uhm,
+                this->LM,
+                this->orb_con.ParaV,
 				this->pelec->ekb,
 				this->pelec->wg,
 				INPUT.dos_edelta_ev,
@@ -1204,10 +1203,10 @@ ModuleIO::Output_Mat_Sparse<TK> ESolver_KS_LCAO<TK, TR>::create_Output_Mat_Spars
 			istep,
 			this->pelec->pot->get_effective_v(),
 			*this->LOWF.ParaV,
-			this->uhm,
-            this->gen_h, // mohan add 2024-04-02
+            this->gen_h, // mohan add 2024-04-06
             this->GK, // mohan add 2024-04-01
 			this->LM,
+            GlobalC::GridD, // mohan add 2024-04-06
 			this->kv,
 			this->p_hamilt);
 }
diff --git a/source/module_esolver/esolver_ks_lcao.h b/source/module_esolver/esolver_ks_lcao.h
index 6e31bacd39..f579579cea 100644
--- a/source/module_esolver/esolver_ks_lcao.h
+++ b/source/module_esolver/esolver_ks_lcao.h
@@ -5,7 +5,6 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/record_adj.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 // for grid integration
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
@@ -76,9 +75,6 @@ namespace ModuleESolver
         // we will get rid of this class soon, don't use it, mohan 2024-03-28
         Local_Orbital_Charge LOC;
 
-        // we will get rid of this class soon, don't use it, mohan 2024-03-28
-        LCAO_Hamilt uhm;
-
         LCAO_gen_fixedH gen_h; // mohan add 2024-04-02
 
         // used for k-dependent grid integration.
@@ -106,6 +102,7 @@ namespace ModuleESolver
         //--------------common for all calculation, not only scf-------------
         // set matrix and grid integral
         void set_matrix_grid(Record_adj& ra);
+
         void beforesolver(const int istep);
         //----------------------------------------------------------------------
 
@@ -127,11 +124,15 @@ namespace ModuleESolver
         std::shared_ptr<Exx_LRI<double>> exx_lri_double = nullptr;
         std::shared_ptr<Exx_LRI<std::complex<double>>> exx_lri_complex = nullptr;
 #endif
+
     private:
+
         // tmp interfaces  before sub-modules are refactored
         void dftu_cal_occup_m(const int& iter, const std::vector<std::vector<TK>>& dm) const;
+
 #ifdef __DEEPKS
         void dpks_cal_e_delta_band(const std::vector<std::vector<TK>>& dm) const;
+
         void dpks_cal_projected_DM(const elecstate::DensityMatrix<TK, double>* dm) const;
 #endif
 
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index e33f817073..adf4dd1798 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -494,7 +494,9 @@ void ESolver_KS_LCAO<std::complex<double>, double>::get_S(void)
         dynamic_cast<hamilt::OperatorLCAO<std::complex<double>, double>*>(this->p_hamilt->ops)->contributeHR();
     }
 
-    ModuleIO::output_S_R(this->uhm, this->p_hamilt, "SR.csr");
+    ModuleIO::output_SR(orb_con.ParaV, this->LM, GlobalC::GridD, this->p_hamilt, "SR.csr");
+
+    return;
 }
 
 
@@ -525,7 +527,10 @@ void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::get_S(void)
         dynamic_cast<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt->ops)
             ->contributeHR();
     }
-    ModuleIO::output_S_R(this->uhm, this->p_hamilt, "SR.csr");
+
+    ModuleIO::output_SR(orb_con.ParaV, this->LM, GlobalC::GridD, this->p_hamilt, "SR.csr");
+
+    return;
 }
 
 
diff --git a/source/module_esolver/esolver_ks_lcao_tddft.cpp b/source/module_esolver/esolver_ks_lcao_tddft.cpp
index f104a2454c..dfd7a79c96 100644
--- a/source/module_esolver/esolver_ks_lcao_tddft.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tddft.cpp
@@ -100,7 +100,7 @@ void ESolver_KS_LCAO_TDDFT::init(Input& inp, UnitCell& ucell)
     //------------------init Hamilt_lcao----------------------
 
     // pass Hamilt-pointer to Operator
-    this->gen_h.LM = this->uhm.LM = &this->LM;
+    this->gen_h.LM = &this->LM;
     // pass basis-pointer to EState and Psi
     this->LOC.ParaV = this->LOWF.ParaV = this->LM.ParaV;
 
diff --git a/source/module_esolver/esolver_ks_lcao_tddft.h b/source/module_esolver/esolver_ks_lcao_tddft.h
index 8e1f4a2c89..44011309f2 100644
--- a/source/module_esolver/esolver_ks_lcao_tddft.h
+++ b/source/module_esolver/esolver_ks_lcao_tddft.h
@@ -4,7 +4,6 @@
 #include "esolver_ks_lcao.h"
 #include "module_basis/module_ao/ORB_control.h"
 #include "module_psi/psi.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/record_adj.h"
diff --git a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
index 239444ef9d..a8ffb911f7 100644
--- a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
@@ -4,62 +4,106 @@
 #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #endif
+
 namespace ModuleESolver
 {
+
+    using namespace std;
+
+    //! dftu occupation matrix for gamma only using dm(double)
     template <>
-    void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(const int& iter, const std::vector<std::vector<double>>& dm)const
+    void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(
+         const int& iter, 
+         const vector<vector<double>>& dm)const
     {
-        GlobalC::dftu.cal_occup_m_gamma(iter, dm, this->p_chgmix->get_mixing_beta());
+		GlobalC::dftu.cal_occup_m_gamma(
+				iter, 
+				dm, 
+				this->p_chgmix->get_mixing_beta());
     }
 
+    //! dftu occupation matrix for multiple k-points using dm(complex)
     template <>
-    void ESolver_KS_LCAO<std::complex<double>, double>::dftu_cal_occup_m(const int& iter, const std::vector<std::vector<std::complex<double>>>& dm)const
+    void ESolver_KS_LCAO<complex<double>, double>::dftu_cal_occup_m(
+       const int& iter, 
+       const vector<vector<complex<double>>>& dm)const
     {
-        GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+        GlobalC::dftu.cal_occup_m_k(
+                 iter, 
+                 dm, 
+                 this->kv, 
+                 this->p_chgmix->get_mixing_beta(), 
+                 this->p_hamilt);
     }
+
+    //! dftu occupation matrix
     template <>
-    void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::dftu_cal_occup_m(const int& iter, const std::vector<std::vector<std::complex<double>>>& dm)const
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dftu_cal_occup_m(
+       const int& iter, 
+       const vector<vector<complex<double>>>& dm)const
     {
-        GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+        GlobalC::dftu.cal_occup_m_k(
+                 iter, 
+                 dm, 
+                 this->kv, 
+                 this->p_chgmix->get_mixing_beta(), 
+                 this->p_hamilt);
     }
+
 #ifdef __DEEPKS
     template<>
-    void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(const std::vector<std::vector<double>>& dm)const
+    void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(
+       const vector<vector<double>>& dm)const
     {
         GlobalC::ld.cal_e_delta_band(dm);
     }
+
+
     template<>
-    void ESolver_KS_LCAO<std::complex<double>, double>::dpks_cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm)const
+    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_e_delta_band(
+        const vector<vector<complex<double>>>& dm)const
     {
         GlobalC::ld.cal_e_delta_band_k(dm, this->kv.nks);
     }
+
+
     template<>
-    void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::dpks_cal_e_delta_band(const std::vector<std::vector<std::complex<double>>>& dm)const
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_e_delta_band(
+        const vector<vector<complex<double>>>& dm)const
     {
         GlobalC::ld.cal_e_delta_band_k(dm, this->kv.nks);
     }
+
+
     template<>
-    void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm)const
+    void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(
+        const elecstate::DensityMatrix<double, double>* dm)const
     {
-        GlobalC::ld.cal_projected_DM(dm, //this->LOC.dm_gamma,
+        GlobalC::ld.cal_projected_DM(dm,
             GlobalC::ucell,
             GlobalC::ORB,
             GlobalC::GridD);
     }
+
+
     template<>
-    void ESolver_KS_LCAO<std::complex<double>, double>::dpks_cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm)const
+    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_projected_DM(
+          const elecstate::DensityMatrix<complex<double>, double>* dm)const
     {
-        GlobalC::ld.cal_projected_DM_k(dm, //this->LOC.dm_k,
+        GlobalC::ld.cal_projected_DM_k(dm,
             GlobalC::ucell,
             GlobalC::ORB,
             GlobalC::GridD,
             this->kv.nks,
             this->kv.kvec_d);
     }
+
+
     template<>
-    void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::dpks_cal_projected_DM(const elecstate::DensityMatrix<std::complex<double>, double>* dm)const
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_projected_DM(
+          const elecstate::DensityMatrix<complex<double>, double>* dm)const
     {
-        GlobalC::ld.cal_projected_DM_k(dm, //this->LOC.dm_k,
+        GlobalC::ld.cal_projected_DM_k(dm,
             GlobalC::ucell,
             GlobalC::ORB,
             GlobalC::GridD,
@@ -67,4 +111,4 @@ namespace ModuleESolver
             this->kv.kvec_d);
     }
 #endif
-}
\ No newline at end of file
+}
diff --git a/source/module_hamilt_general/hamilt.h b/source/module_hamilt_general/hamilt.h
index 8ddd1e3e15..b83f089cda 100644
--- a/source/module_hamilt_general/hamilt.h
+++ b/source/module_hamilt_general/hamilt.h
@@ -21,10 +21,17 @@ class Hamilt
     virtual void updateHk(const int ik){return;}
 
     /// refresh status of Hamiltonian, for example, refresh H(R) and S(R) in LCAO case
-    virtual void refresh(){return;}
+    virtual void refresh(void){return;}
 
     /// core function: for solving eigenvalues of Hamiltonian with iterative method
-    virtual void hPsi(const T* psi_in, T* hpsi, const size_t size) const{return;}
+	virtual void hPsi(
+			const T* psi_in, 
+			T* hpsi, 
+			const size_t size) const
+	{
+		return;
+	}
+
     virtual void sPsi(const T* psi_in, // psi
                       T* spsi,         // spsi
                       const int nrow,  // dimension of spsi: nbands * nrow
@@ -35,9 +42,14 @@ class Hamilt
         syncmem_op()(this->ctx, this->ctx, spsi, psi_in, static_cast<size_t>(nbands * nrow));
     }
 
-    /// core function: return H(k) and S(k) matrixs for direct solving eigenvalues.
-    virtual void matrix(MatrixBlock<std::complex<double>> &hk_in, MatrixBlock<std::complex<double>> &sk_in){return;}
-    virtual void matrix(MatrixBlock<double> &hk_in, MatrixBlock<double> &sk_in){return;}
+	/// core function: return H(k) and S(k) matrixs for direct solving eigenvalues.
+	virtual void matrix(
+			MatrixBlock<std::complex<double>> &hk_in, 
+			MatrixBlock<std::complex<double>> &sk_in){return;}
+
+	virtual void matrix(
+			MatrixBlock<double> &hk_in, 
+			MatrixBlock<double> &sk_in){return;}
 
     std::string classname = "none";
 
@@ -45,11 +57,13 @@ class Hamilt
 
     /// first node operator, add operations from each operators
     Operator<T, Device>* ops = nullptr;
+
 protected:
+
     Device* ctx = {};
     using syncmem_op = psi::memory::synchronize_memory_op<T, Device, Device>;
 };
 
 } // namespace hamilt
 
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
index 2de30cca69..ccd1485419 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
@@ -26,9 +26,12 @@ if(ENABLE_LCAO)
         fvl_dphi_k.cpp
         fvnl_dbeta_k.cpp
         LCAO_gen_fixedH.cpp
-        LCAO_hamilt.cpp
         grid_init.cpp
-        sparse_format.cpp
+        spar_dh.cpp
+        spar_exx.cpp
+        spar_hsr.cpp
+        spar_st.cpp
+        spar_u.cpp
         LCAO_matrix.cpp
         LCAO_nnr.cpp
         record_adj.cpp
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index f54a0ca85c..5168bd0759 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -7,7 +7,6 @@
 #ifdef __DEEPKS
 #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h" //caoyu add for deepks on 20210813
 #endif
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_io/write_HS.h"
 #include "module_elecstate/elecstate_lcao.h"
 
@@ -61,8 +60,9 @@ void Force_LCAO_gamma::ftable_gamma(const bool isforce,
 
     // sum up the density matrix with different spin
     // DM->sum_DMR_spin();
-    //
+    
     this->cal_ftvnl_dphi(DM, lm, isforce, isstress, ftvnl_dphi, stvnl_dphi);
+
     this->cal_fvnl_dbeta(DM, isforce, isstress, fvnl_dbeta, svnl_dbeta);
 
     this->cal_fvl_dphi(loc.DM, isforce, isstress, pelec->pot, gint_gamma, fvl_dphi, svl_dphi);
@@ -75,8 +75,14 @@ void Force_LCAO_gamma::ftable_gamma(const bool isforce,
         GlobalC::ld.cal_projected_DM(DM, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
         GlobalC::ld.cal_descriptor();
         GlobalC::ld.cal_gedm(GlobalC::ucell.nat);
-        GlobalC::ld
-            .cal_f_delta_gamma(dm_gamma, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD, isstress, svnl_dalpha);
+        GlobalC::ld.cal_f_delta_gamma(
+				dm_gamma, 
+				GlobalC::ucell, 
+				GlobalC::ORB, 
+				GlobalC::GridD, 
+				isstress, 
+				svnl_dalpha);
+
 #ifdef __MPI
         Parallel_Reduce::reduce_all(GlobalC::ld.F_delta.c, GlobalC::ld.F_delta.nr * GlobalC::ld.F_delta.nc);
         if (isstress)
@@ -84,6 +90,7 @@ void Force_LCAO_gamma::ftable_gamma(const bool isforce,
             Parallel_Reduce::reduce_pool(svnl_dalpha.c, svnl_dalpha.nr * svnl_dalpha.nc);
         }
 #endif
+
         if (GlobalV::deepks_out_unittest)
         {
             GlobalC::ld.print_dm(dm_gamma[0]);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.h
index d1c99b4dca..440a92217c 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.h
@@ -5,8 +5,8 @@
 #include "module_base/global_variable.h"
 #include "module_base/matrix.h"
 #include "module_elecstate/module_dm/density_matrix.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_psi/psi.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.h b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.h
index 34f09eadaf..b84ab380ad 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.h
@@ -6,8 +6,8 @@
 #include "module_base/global_variable.h"
 #include "module_base/matrix.h"
 #include "module_elecstate/module_dm/density_matrix.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.cpp
deleted file mode 100644
index 998311df6c..0000000000
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.cpp
+++ /dev/null
@@ -1,675 +0,0 @@
-#include "LCAO_hamilt.h"
-
-#include "module_base/parallel_reduce.h"
-#include "module_cell/module_neighbor/sltk_atom_arrange.h"
-#include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "module_hamilt_general/module_xc/xc_functional.h"
-#include "module_hamilt_lcao/module_dftu/dftu.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
-#ifdef __DEEPKS
-#include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"	//caoyu add 2021-07-26
-#endif
-#include "module_base/timer.h"
-
-#ifdef __EXX
-#include "LCAO_hamilt.hpp"
-#endif
-
-#include "sparse_format.h"
-
-using namespace sparse_format;
-
-LCAO_Hamilt::LCAO_Hamilt()
-{
-}
-
-LCAO_Hamilt::~LCAO_Hamilt()
-{
-    if(GlobalV::test_deconstructor)
-    {
-        std::cout << " ~LCAO_Hamilt()" << std::endl;
-    }
-}
-
-
-#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
-void LCAO_Hamilt::cal_HContainer_sparse_d(
-		const int &current_spin, 
-		const double &sparse_threshold, 
-		const hamilt::HContainer<double>& hR, 
-		std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& target)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_HContainer_sparse_d");
-
-    const Parallel_Orbitals* paraV = this->LM->ParaV;
-    auto row_indexes = paraV->get_indexes_row();
-    auto col_indexes = paraV->get_indexes_col();
-    for(int iap=0;iap<hR.size_atom_pairs();++iap)
-    {
-        int atom_i = hR.get_atom_pair(iap).get_atom_i();
-        int atom_j = hR.get_atom_pair(iap).get_atom_j();
-        int start_i = paraV->atom_begin_row[atom_i];
-        int start_j = paraV->atom_begin_col[atom_j];
-        int row_size = paraV->get_row_size(atom_i);
-        int col_size = paraV->get_col_size(atom_j);
-        for(int iR=0;iR<hR.get_atom_pair(iap).get_R_size();++iR)
-        {
-            auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR);
-            int* r_index = hR.get_atom_pair(iap).get_R_index(iR);
-            Abfs::Vector3_Order<int> dR(r_index[0], r_index[1], r_index[2]);
-            for(int i=0;i<row_size;++i)
-            {
-                int mu = row_indexes[start_i+i];
-                for(int j=0;j<col_size;++j)
-                {
-                    int nu = col_indexes[start_j+j];
-                    const auto& value_tmp = matrix.get_value(i,j);
-                    if(std::abs(value_tmp)>sparse_threshold)
-                    {
-                        target[dR][mu][nu] = value_tmp;
-                    }
-                }
-            }
-        }
-    }
-
-    return;
-}
-
-void LCAO_Hamilt::cal_HContainer_sparse_cd(
-		const int &current_spin, 
-		const double &sparse_threshold, 
-		const hamilt::HContainer<std::complex<double>>& hR, 
-		std::map<Abfs::Vector3_Order<int>, 
-		std::map<size_t, std::map<size_t, std::complex<double>>>>& target)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_HContainer_sparse_cd");
-
-    const Parallel_Orbitals* paraV = this->LM->ParaV;
-    auto row_indexes = paraV->get_indexes_row();
-    auto col_indexes = paraV->get_indexes_col();
-    for(int iap=0;iap<hR.size_atom_pairs();++iap)
-    {
-        int atom_i = hR.get_atom_pair(iap).get_atom_i();
-        int atom_j = hR.get_atom_pair(iap).get_atom_j();
-        int start_i = paraV->atom_begin_row[atom_i];
-        int start_j = paraV->atom_begin_col[atom_j];
-        int row_size = paraV->get_row_size(atom_i);
-        int col_size = paraV->get_col_size(atom_j);
-        for(int iR=0;iR<hR.get_atom_pair(iap).get_R_size();++iR)
-        {
-            auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR);
-            int* r_index = hR.get_atom_pair(iap).get_R_index(iR);
-            Abfs::Vector3_Order<int> dR(r_index[0], r_index[1], r_index[2]);
-            for(int i=0;i<row_size;++i)
-            {
-                int mu = row_indexes[start_i+i];
-                for(int j=0;j<col_size;++j)
-                {
-                    int nu = col_indexes[start_j+j];
-                    const auto& value_tmp = matrix.get_value(i,j);
-                    if(std::abs(value_tmp)>sparse_threshold)
-                    {
-                        target[dR][mu][nu] = value_tmp;
-                    }
-                }
-            }
-        }
-    }
-
-    return;
-}
-
-void LCAO_Hamilt::cal_HSR_sparse(
-		const int &current_spin, 
-		const double &sparse_threshold, 
-		const int (&nmp)[3], 
-		hamilt::Hamilt<std::complex<double>>* p_ham)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_HSR_sparse");
-
-    sparse_format::set_R_range(*this->LM);
-
-    //cal_STN_R_sparse(current_spin, sparse_threshold);
-    if(GlobalV::NSPIN!=4)
-    {
-        hamilt::HamiltLCAO<std::complex<double>, double>* p_ham_lcao = 
-        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham);
-
-		this->cal_HContainer_sparse_d(current_spin, 
-				sparse_threshold, 
-				*(p_ham_lcao->getHR()), 
-				this->LM->HR_sparse[current_spin]);
-
-		this->cal_HContainer_sparse_d(current_spin, 
-				sparse_threshold, 
-				*(p_ham_lcao->getSR()), 
-				this->LM->SR_sparse);
-    }
-    else
-    {
-        hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>* p_ham_lcao = 
-        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham);
-
-        this->cal_HContainer_sparse_cd(current_spin, 
-        sparse_threshold, 
-        *(p_ham_lcao->getHR()), 
-        this->LM->HR_soc_sparse);
-
-        this->cal_HContainer_sparse_cd(current_spin, 
-        sparse_threshold, 
-        *(p_ham_lcao->getSR()), 
-        this->LM->SR_soc_sparse);
-    }
-
-    // only old DFT+U method need to cal extra contribution to HR
-    if (GlobalV::dft_plus_u == 2)
-    {
-        if (GlobalV::NSPIN != 4)
-        {
-            cal_HR_dftu_sparse(current_spin, sparse_threshold);
-        }
-        else
-        {
-            cal_HR_dftu_soc_sparse(current_spin, sparse_threshold);
-        }
-    }
-
-#ifdef __EXX
-#ifdef __MPI
-    if( GlobalC::exx_info.info_global.cal_exx )
-    {
-		if(GlobalC::exx_info.info_ri.real_number)
-		{
-			this->cal_HR_exx_sparse(current_spin, sparse_threshold, nmp, *this->LM->Hexxd);
-		}
-		else
-		{
-			this->cal_HR_exx_sparse(current_spin, sparse_threshold, nmp, *this->LM->Hexxc);
-		}
-	}
-#endif // __MPI
-#endif // __EXX
-
-    clear_zero_elements(current_spin, sparse_threshold);
-}
-
-void LCAO_Hamilt::cal_STN_R_sparse_for_T(const double &sparse_threshold)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_STN_R_sparse_for_T");
-
-    int index = 0;
-    ModuleBase::Vector3<double> dtau, tau1, tau2;
-    ModuleBase::Vector3<double> dtau1, dtau2, tau0;
-
-    double tmp=0.0;
-    std::complex<double> tmpc=complex<double>(0.0,0.0);
-
-    for(int T1 = 0; T1 < GlobalC::ucell.ntype; ++T1)
-    {
-        Atom* atom1 = &GlobalC::ucell.atoms[T1];
-        for(int I1 = 0; I1 < atom1->na; ++I1)
-        {
-            tau1 = atom1->tau[I1];
-            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-            Atom* atom1 = &GlobalC::ucell.atoms[T1];
-            const int start = GlobalC::ucell.itiaiw2iwt(T1,I1,0);
-
-            for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ++ad)
-            {
-                const int T2 = GlobalC::GridD.getType(ad);
-                const int I2 = GlobalC::GridD.getNatom(ad);
-                Atom* atom2 = &GlobalC::ucell.atoms[T2];
-
-                tau2 = GlobalC::GridD.getAdjacentTau(ad);
-                dtau = tau2 - tau1;
-                double distance = dtau.norm() * GlobalC::ucell.lat0;
-                double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
-
-                bool adj = false;
-
-                if(distance < rcut) adj = true;
-                else if(distance >= rcut)
-                {
-                    for(int ad0 = 0; ad0 < GlobalC::GridD.getAdjacentNum()+1; ++ad0)
-                    {
-                        const int T0 = GlobalC::GridD.getType(ad0);
-
-                        tau0 = GlobalC::GridD.getAdjacentTau(ad0);
-                        dtau1 = tau0 - tau1;
-                        dtau2 = tau0 - tau2;
-
-                        double distance1 = dtau1.norm() * GlobalC::ucell.lat0;
-                        double distance2 = dtau2.norm() * GlobalC::ucell.lat0;
-
-                        double rcut1 = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ucell.infoNL.Beta[T0].get_rcut_max();
-                        double rcut2 = GlobalC::ORB.Phi[T2].getRcut() + GlobalC::ucell.infoNL.Beta[T0].get_rcut_max();
-
-                        if( distance1 < rcut1 && distance2 < rcut2 )
-                        {
-                            adj = true;
-                            break;
-                        }
-                    }
-                }
-
-                if(adj)
-                {
-                    const int start2 = GlobalC::ucell.itiaiw2iwt(T2,I2,0);
-
-					Abfs::Vector3_Order<int> dR(
-							GlobalC::GridD.getBox(ad).x, 
-							GlobalC::GridD.getBox(ad).y, 
-							GlobalC::GridD.getBox(ad).z);
-
-                    for(int ii=0; ii<atom1->nw*GlobalV::NPOL; ii++)
-                    {
-                        const int iw1_all = start + ii;
-                        const int mu = this->LM->ParaV->global2local_row(iw1_all);
-
-                        if(mu<0)continue;
-
-                        for(int jj=0; jj<atom2->nw*GlobalV::NPOL; jj++)
-                        {
-                            int iw2_all = start2 + jj;
-                            const int nu = this->LM->ParaV->global2local_col(iw2_all);
-
-                            if(nu<0)continue;
-
-                            if(GlobalV::NSPIN!=4)
-                            {
-                                tmp = this->LM->Hloc_fixedR[index];
-                                if (std::abs(tmp) > sparse_threshold)
-                                {
-                                    this->LM->TR_sparse[dR][iw1_all][iw2_all] = tmp;
-                                }
-                            }
-                            else
-                            {
-                                tmpc = this->LM->Hloc_fixedR_soc[index];
-                                if(std::abs(tmpc) > sparse_threshold)
-                                {
-                                    this->LM->TR_soc_sparse[dR][iw1_all][iw2_all] = tmpc;
-                                }
-                            }
-
-                            ++index;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return;
-}
-
-void LCAO_Hamilt::cal_SR_sparse(const double &sparse_threshold, hamilt::Hamilt<std::complex<double>>* p_ham)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_SR_sparse");
-    sparse_format::set_R_range(*this->LM);
-    //cal_STN_R_sparse(current_spin, sparse_threshold);
-    if(GlobalV::NSPIN!=4)
-    {
-        hamilt::HamiltLCAO<std::complex<double>, double>* p_ham_lcao 
-        = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham);
-        this->cal_HContainer_sparse_d(0, sparse_threshold, *(p_ham_lcao->getSR()), this->LM->SR_sparse);
-    }
-    else
-    {
-        hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>* p_ham_lcao 
-        = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham);
-        this->cal_HContainer_sparse_cd(0, sparse_threshold, *(p_ham_lcao->getSR()), this->LM->SR_soc_sparse);
-    }
-}
-
-void LCAO_Hamilt::cal_TR_sparse(
-		LCAO_gen_fixedH &gen_h,
-		const double &sparse_threshold)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_TR_sparse");
-    
-    //need to rebuild T(R)
-    this->LM->Hloc_fixedR.resize(this->LM->ParaV->nnr);
-    this->LM->zeros_HSR('T');
-
-    gen_h.build_ST_new('T', 0, GlobalC::ucell, this->LM->Hloc_fixedR.data());
-
-    sparse_format::set_R_range(*this->LM);
-    this->cal_STN_R_sparse_for_T(sparse_threshold);
-
-    return;
-}
-
-void LCAO_Hamilt::cal_HR_dftu_sparse(const int &current_spin, const double &sparse_threshold)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_HR_dftu_sparse");
-    ModuleBase::timer::tick("LCAO_Hamilt","cal_HR_dftu_sparse");
-
-    int total_R_num = this->LM->all_R_coor.size();
-    int *nonzero_num = new int[total_R_num];
-    ModuleBase::GlobalFunc::ZEROS(nonzero_num, total_R_num);
-    int count = 0;
-    for (auto &R_coor : this->LM->all_R_coor)
-    {
-        auto iter = this->LM->SR_sparse.find(R_coor);
-        if (iter != this->LM->SR_sparse.end())
-        {
-            for (auto &row_loop : iter->second)
-            {
-                nonzero_num[count] += row_loop.second.size();
-            }
-        }
-        count++;
-    }
-
-    Parallel_Reduce::reduce_all(nonzero_num, total_R_num);
-
-    double *HR_tmp = new double[this->LM->ParaV->nloc];
-    double *SR_tmp = new double[this->LM->ParaV->nloc];
-
-    int ir=0;
-    int ic=0;
-    int iic=0;
-    auto &temp_HR_sparse = this->LM->HR_sparse[current_spin];
-
-    count = 0;
-    for (auto &R_coor : this->LM->all_R_coor)
-    {
-        if (nonzero_num[count] != 0)
-        {
-            ModuleBase::GlobalFunc::ZEROS(HR_tmp, this->LM->ParaV->nloc);
-            ModuleBase::GlobalFunc::ZEROS(SR_tmp, this->LM->ParaV->nloc);
-
-            auto iter = this->LM->SR_sparse.find(R_coor);
-            if (iter != this->LM->SR_sparse.end())
-            {
-                for (auto &row_loop : iter->second)
-                {
-                    ir = this->LM->ParaV->global2local_row(row_loop.first);
-                    for (auto &col_loop : row_loop.second)
-                    {
-                        ic = this->LM->ParaV->global2local_col(col_loop.first);
-                        if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-                        {
-                            iic = ir + ic * this->LM->ParaV->nrow;
-                        }
-                        else
-                        {
-                            iic = ir * this->LM->ParaV->ncol + ic;
-                        }
-                        SR_tmp[iic] = col_loop.second;
-                    }
-                }
-            }
-
-            GlobalC::dftu.cal_eff_pot_mat_R_double(current_spin, SR_tmp, HR_tmp);
-
-            for (int i = 0; i < GlobalV::NLOCAL; ++i)
-            {
-                ir = this->LM->ParaV->global2local_row(i);
-                if (ir >= 0)
-                {
-                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
-                    {
-                        ic = this->LM->ParaV->global2local_col(j);
-                        if (ic >= 0)
-                        {
-                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-                            {
-                                iic = ir + ic * this->LM->ParaV->nrow;
-                            }
-                            else
-                            {
-                                iic = ir * this->LM->ParaV->ncol + ic;
-                            }
-
-                            if (std::abs(HR_tmp[iic]) > sparse_threshold)
-                            {
-                                double &value = temp_HR_sparse[R_coor][i][j];
-                                value += HR_tmp[iic];
-                                if (std::abs(value) <= sparse_threshold)
-                                {
-                                    temp_HR_sparse[R_coor][i].erase(j);
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-        }
-
-        count++;
-    }
-
-    delete[] nonzero_num;
-    delete[] HR_tmp;
-    delete[] SR_tmp;
-    nonzero_num = nullptr;
-    HR_tmp = nullptr;
-    SR_tmp = nullptr;
-
-    ModuleBase::timer::tick("LCAO_Hamilt","cal_HR_dftu_sparse");
-
-}
-
-void LCAO_Hamilt::cal_HR_dftu_soc_sparse(const int &current_spin, const double &sparse_threshold)
-{
-    ModuleBase::TITLE("LCAO_Hamilt","cal_HR_dftu_soc_sparse");
-    ModuleBase::timer::tick("LCAO_Hamilt","cal_HR_dftu_soc_sparse");
-
-    int total_R_num = this->LM->all_R_coor.size();
-    int *nonzero_num = new int[total_R_num];
-    ModuleBase::GlobalFunc::ZEROS(nonzero_num, total_R_num);
-    int count = 0;
-    for (auto &R_coor : this->LM->all_R_coor)
-    {
-        auto iter = this->LM->SR_soc_sparse.find(R_coor);
-        if (iter != this->LM->SR_soc_sparse.end())
-        {
-            for (auto &row_loop : iter->second)
-            {
-                nonzero_num[count] += row_loop.second.size();
-            }
-        }
-        count++;
-    }
-
-    Parallel_Reduce::reduce_all(nonzero_num, total_R_num);
-
-    std::complex<double> *HR_soc_tmp = new std::complex<double>[this->LM->ParaV->nloc];
-    std::complex<double> *SR_soc_tmp = new std::complex<double>[this->LM->ParaV->nloc];
-
-    int ir=0;
-    int ic=0;
-    int iic=0;
-
-    count = 0;
-    for (auto &R_coor : this->LM->all_R_coor)
-    {
-        if (nonzero_num[count] != 0)
-        {
-            ModuleBase::GlobalFunc::ZEROS(HR_soc_tmp, this->LM->ParaV->nloc);
-            ModuleBase::GlobalFunc::ZEROS(SR_soc_tmp, this->LM->ParaV->nloc);
-
-            auto iter = this->LM->SR_soc_sparse.find(R_coor);
-            if (iter != this->LM->SR_soc_sparse.end())
-            {
-                for (auto &row_loop : iter->second)
-                {
-                    ir = this->LM->ParaV->global2local_row(row_loop.first);
-                    for (auto &col_loop : row_loop.second)
-                    {
-                        ic = this->LM->ParaV->global2local_col(col_loop.first);
-                        if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-                        {
-                            iic = ir + ic * this->LM->ParaV->nrow;
-                        }
-                        else
-                        {
-                            iic = ir * this->LM->ParaV->ncol + ic;
-                        }
-                        SR_soc_tmp[iic] = col_loop.second;
-                    }
-                }
-            }
-
-            GlobalC::dftu.cal_eff_pot_mat_R_complex_double(current_spin, SR_soc_tmp, HR_soc_tmp);
-
-            for (int i = 0; i < GlobalV::NLOCAL; ++i)
-            {
-                ir = this->LM->ParaV->global2local_row(i);
-                if (ir >= 0)
-                {
-                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
-                    {
-                        ic = this->LM->ParaV->global2local_col(j);
-                        if (ic >= 0)
-                        {
-                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-                            {
-                                iic = ir + ic * this->LM->ParaV->nrow;
-                            }
-                            else
-                            {
-                                iic = ir * this->LM->ParaV->ncol + ic;
-                            }
-
-                            if (std::abs(HR_soc_tmp[iic]) > sparse_threshold)
-                            {
-                                std::complex<double> &value = this->LM->HR_soc_sparse[R_coor][i][j];
-                                value += HR_soc_tmp[iic];
-                                if (std::abs(value) <= sparse_threshold)
-                                {
-                                    this->LM->HR_soc_sparse[R_coor][i].erase(j);
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-
-        }
-
-        count++;
-    }
-
-    delete[] nonzero_num;
-    delete[] HR_soc_tmp;
-    delete[] SR_soc_tmp;
-    nonzero_num = nullptr;
-    HR_soc_tmp = nullptr;
-    SR_soc_tmp = nullptr;
-
-    ModuleBase::timer::tick("LCAO_Hamilt","calculat_HR_dftu_soc_sparse");
-
-}
-
-
-// in case there are elements smaller than the threshold
-void LCAO_Hamilt::clear_zero_elements(const int &current_spin, const double &sparse_threshold)
-{
-    if(GlobalV::NSPIN != 4)
-    {
-        for (auto &R_loop : this->LM->HR_sparse[current_spin])
-        {
-            for (auto &row_loop : R_loop.second)
-            {
-                auto &col_map = row_loop.second;
-                auto iter = col_map.begin();
-                while (iter != col_map.end())
-                {
-                    if (std::abs(iter->second) <= sparse_threshold)
-                    {
-                        col_map.erase(iter++);
-                    }
-                    else
-                    {
-                        iter++;
-                    }
-                }
-            }
-        }
-
-        for (auto &R_loop : this->LM->SR_sparse)
-        {
-            for (auto &row_loop : R_loop.second)
-            {
-                auto &col_map = row_loop.second;
-                auto iter = col_map.begin();
-                while (iter != col_map.end())
-                {
-                    if (std::abs(iter->second) <= sparse_threshold)
-                    {
-                        col_map.erase(iter++);
-                    }
-                    else
-                    {
-                        iter++;
-                    }
-                }
-            }
-        }
-
-    }
-    else
-    {
-        for (auto &R_loop : this->LM->HR_soc_sparse)
-        {
-            for (auto &row_loop : R_loop.second)
-            {
-                auto &col_map = row_loop.second;
-                auto iter = col_map.begin();
-                while (iter != col_map.end())
-                {
-                    if (std::abs(iter->second) <= sparse_threshold)
-                    {
-                        col_map.erase(iter++);
-                    }
-                    else
-                    {
-                        iter++;
-                    }
-                }
-            }
-        }
-
-        for (auto &R_loop : this->LM->SR_soc_sparse)
-        {
-            for (auto &row_loop : R_loop.second)
-            {
-                auto &col_map = row_loop.second;
-                auto iter = col_map.begin();
-                while (iter != col_map.end())
-                {
-                    if (std::abs(iter->second) <= sparse_threshold)
-                    {
-                        col_map.erase(iter++);
-                    }
-                    else
-                    {
-                        iter++;
-                    }
-                }
-            }
-        }
-    }
-}
-
-
-void LCAO_Hamilt::destroy_all_HSR_sparse(void)
-{
-	this->LM->destroy_HS_R_sparse();
-}
-
-void LCAO_Hamilt::destroy_TR_sparse(void)
-{
-	this->LM->destroy_T_R_sparse();
-}
-
-void LCAO_Hamilt::destroy_dH_R_sparse(void)
-{
-	this->LM->destroy_dH_R_sparse();
-}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h
deleted file mode 100644
index df6f5dfc16..0000000000
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef LCAO_HAMILT_H
-#define LCAO_HAMILT_H
-
-#include "module_base/global_function.h"
-#include "module_base/global_variable.h"
-#include "LCAO_gen_fixedH.h"
-#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
-#include "module_hamilt_general/hamilt.h"
-#include "module_hamilt_lcao/module_gint/grid_technique.h"
-
-#include "module_hamilt_lcao/module_gint/gint_gamma.h"
-#include "module_hamilt_lcao/module_gint/gint_k.h"
-
-
-#ifdef __EXX
-#include <RI/global/Tensor.h>
-#endif
-
-class LCAO_Hamilt
-{
-    public:
-
-    LCAO_Hamilt();
-
-    ~LCAO_Hamilt();
-
-    void cal_HContainer_sparse_d(const int &current_spin, 
-        const double &sparse_threshold, 
-        const hamilt::HContainer<double>& hR, 
-        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& target);
-
-    void cal_HContainer_sparse_cd(const int &current_spin, 
-        const double &sparse_threshold, 
-        const hamilt::HContainer<std::complex<double>>& hR, 
-        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>>& target);
-
-
-    void cal_STN_R_sparse_for_T(const double &sparse_threshold);
-
-    void cal_HR_dftu_sparse(const int &current_spin, const double &sparse_threshold);
-
-    void cal_HR_dftu_soc_sparse(const int &current_spin, const double &sparse_threshold);
-
-#ifdef __EXX
-    template<typename Tdata> void cal_HR_exx_sparse(
-            const int &current_spin,
-            const double &sparse_threshold,
-            const int (&nmp)[3],
-            const std::vector< std::map <int, std::map < std::pair<int, std::array<int,3>>, RI::Tensor<Tdata> > >>& Hexxs);
-#endif
-
-	void cal_HSR_sparse(
-			const int &current_spin, 
-			const double &sparse_threshold, 
-			const int (&nmp)[3], 
-			hamilt::Hamilt<std::complex<double>>* p_ham);
-
-    void cal_SR_sparse(const double &sparse_threshold, hamilt::Hamilt<std::complex<double>>* p_ham);
-
-    void clear_zero_elements(const int &current_spin, const double &sparse_threshold);
-
-    void destroy_all_HSR_sparse(void);
-
-    void cal_TR_sparse(
-			LCAO_gen_fixedH &gen_h,
-			const double &sparse_threshold);
-
-    void destroy_TR_sparse(void);
-
-    void destroy_dH_R_sparse(void);
-
-    LCAO_Matrix* LM;
-};
-
-#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
index 57ed48c7e7..ada6b6993e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp
@@ -6,11 +6,13 @@
 #ifndef LCAO_HAMILT_HPP
 #define LCAO_HAMILT_HPP
 
-#include "LCAO_hamilt.h"
 #include "module_base/global_variable.h"
 #include "module_base/abfs-vector3_order.h"
 #include "module_ri/RI_2D_Comm.h"
 #include "module_base/timer.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/spar_exx.h"
+// use LCAO_Matrix
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 
 #include <RI/global/Global_Func-2.h>
 #include <RI/ri/Cell_Nearest.h>
@@ -21,16 +23,18 @@
 #include <string>
 #include <stdexcept>
 
+#ifdef __EXX
 // Peize Lin add 2022.09.13
 template<typename Tdata>
-void LCAO_Hamilt::cal_HR_exx_sparse(
+void sparse_format::cal_HR_exx(
+            LCAO_Matrix &lm, // mohan add 2024-04-06
 			const int &current_spin, 
 			const double &sparse_threshold,
 			const int (&nmp)[3],
 			const std::vector< std::map<int, std::map<std::pair<int,std::array<int,3>>, RI::Tensor<Tdata>>>>& Hexxs)
 {
-	ModuleBase::TITLE("LCAO_Hamilt","cal_HR_exx_sparse");
-	ModuleBase::timer::tick("LCAO_Hamilt","cal_HR_exx_sparse");
+	ModuleBase::TITLE("sparse_format","cal_HR_exx");
+	ModuleBase::timer::tick("sparse_format","cal_HR_exx");
 
 	const Tdata frac = GlobalC::exx_info.info_global.hybrid_alpha;
 
@@ -54,47 +58,70 @@ void LCAO_Hamilt::cal_HR_exx_sparse(
 
 	for(const int is : is_list)
     {
-		int is0_b, is1_b;
+		int is0_b=0;
+        int is1_b=0;
 		std::tie(is0_b,is1_b) = RI_2D_Comm::split_is_block(is);
-        if (Hexxs.empty())	break;
+
+        if (Hexxs.empty())	
+		{
+			break;
+		}
+
 		for(const auto &HexxA : Hexxs[is])
 		{
 			const int iat0 = HexxA.first;
 			for(const auto &HexxB : HexxA.second)
 			{
 				const int iat1 = HexxB.first.first;
+
 				const Abfs::Vector3_Order<int> R = RI_Util::array3_to_Vector3(
 					cell_nearest.get_cell_nearest_discrete(iat0, iat1, HexxB.first.second));
-				this->LM->all_R_coor.insert(R);
+
+				lm.all_R_coor.insert(R);
+
 				const RI::Tensor<Tdata> &Hexx = HexxB.second;
+
 				for(size_t iw0=0; iw0<Hexx.shape[0]; ++iw0)
 				{
 					const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0, is0_b);
-					const int iwt0_local = this->LM->ParaV->global2local_row(iwt0);		
-					if(iwt0_local<0)	continue;
+					const int iwt0_local = lm.ParaV->global2local_row(iwt0);		
+
+					if(iwt0_local<0)	
+					{
+						continue;
+					}
+
 					for(size_t iw1=0; iw1<Hexx.shape[1]; ++iw1)
 					{
 						const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1, is1_b);
-						const int iwt1_local = this->LM->ParaV->global2local_col(iwt1);		
-						if(iwt1_local<0)	continue;
+						const int iwt1_local = lm.ParaV->global2local_col(iwt1);		
+
+						if(iwt1_local<0)	
+						{
+							continue;
+						}
 
 						if(std::abs(Hexx(iw0,iw1)) > sparse_threshold)
 						{
 							if(GlobalV::NSPIN==1 || GlobalV::NSPIN==2)
 							{
-								auto &HR_sparse_ptr = this->LM->HR_sparse[current_spin][R][iwt0];
+								auto &HR_sparse_ptr = lm.HR_sparse[current_spin][R][iwt0];
 								double &HR_sparse = HR_sparse_ptr[iwt1];
 								HR_sparse += RI::Global_Func::convert<double>(frac * Hexx(iw0,iw1));
 								if(std::abs(HR_sparse) <= sparse_threshold)
+								{
 									HR_sparse_ptr.erase(iwt1);
+								}
 							}
 							else if(GlobalV::NSPIN==4)
 							{
-								auto &HR_sparse_ptr = this->LM->HR_soc_sparse[R][iwt0];
+								auto &HR_sparse_ptr = lm.HR_soc_sparse[R][iwt0];
 								std::complex<double> &HR_sparse = HR_sparse_ptr[iwt1];
 								HR_sparse += RI::Global_Func::convert<std::complex<double>>(frac * Hexx(iw0,iw1));
 								if(std::abs(HR_sparse) <= sparse_threshold)
+								{
 									HR_sparse_ptr.erase(iwt1);
+								}
 							}
 							else
 							{
@@ -107,7 +134,8 @@ void LCAO_Hamilt::cal_HR_exx_sparse(
 		}
 	}
 
-	ModuleBase::timer::tick("LCAO_Hamilt","cal_HR_exx_sparse");
+	ModuleBase::timer::tick("sparse_format","cal_HR_exx");
 }
+#endif
 
 #endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
index cee9b60ee3..f0a027801f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
@@ -4,7 +4,6 @@
 #include "module_elecstate/potentials/potential_new.h"
 #include "module_hamilt_general/hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h"
@@ -99,4 +98,4 @@ class HamiltLCAO : public Hamilt<TK>
 
 } // namespace hamilt
 
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
index 50049ee970..ee47cb0b59 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h
@@ -9,10 +9,10 @@
 #include "module_hamilt_lcao/module_gint/grid_technique.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/record_adj.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_psi/psi.h"
 #include "module_elecstate/elecstate.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/DM_gamma_2d_to_grid.h"
+#include "module_base/abfs-vector3_order.h"
 
 
 // try to get rid of this class, so please do not use it 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
similarity index 75%
rename from source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.cpp
rename to source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index 640c57f055..7bc51fe4c3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -1,15 +1,16 @@
-#include "sparse_format.h"
+#include "spar_dh.h"
 
 void sparse_format::cal_dH(
 		LCAO_Matrix &lm,
+		Grid_Driver &grid,
 		LCAO_gen_fixedH &gen_h, 
 		const int &current_spin, 
-		const double &sparse_threshold,
+		const double &sparse_thr,
 		Gint_k &gint_k)
 {
     ModuleBase::TITLE("sparse_format","cal_dH");
 
-    sparse_format::set_R_range(lm);
+    sparse_format::set_R_range(lm.all_R_coor, grid);
 
     const int nnr = lm.ParaV->nnr;
     lm.DHloc_fixedR_x = new double[nnr];
@@ -35,34 +36,38 @@ void sparse_format::cal_dH(
     }
     gen_h.build_Nonlocal_mu_new (lm.Hloc_fixed.data(), true);
     
-    sparse_format::cal_dSTN_R(lm, current_spin, sparse_threshold);
+    sparse_format::cal_dSTN_R(lm, grid, current_spin, sparse_thr);
 
     delete[] lm.DHloc_fixedR_x;
     delete[] lm.DHloc_fixedR_y;
     delete[] lm.DHloc_fixedR_z;
 
-    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_threshold, &lm);
+    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm);
+
+    return;
 }
 
 
-void sparse_format::set_R_range(LCAO_Matrix &lm)
+void sparse_format::set_R_range(
+        std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+		Grid_Driver &grid)
 {
-    int R_minX = int(GlobalC::GridD.getD_minX());
-    int R_minY = int(GlobalC::GridD.getD_minY());
-    int R_minZ = int(GlobalC::GridD.getD_minZ());
+    const int RminX = int(grid.getD_minX());
+    const int RminY = int(grid.getD_minY());
+    const int RminZ = int(grid.getD_minZ());
 
-    int R_x = GlobalC::GridD.getCellX();
-    int R_y = GlobalC::GridD.getCellY();
-    int R_z = GlobalC::GridD.getCellZ();
+    const int Rx = grid.getCellX();
+    const int Ry = grid.getCellY();
+    const int Rz = grid.getCellZ();
 
-    for(int ix = 0; ix < R_x; ix++)
+    for(int ix = 0; ix < Rx; ix++)
     {
-        for(int iy = 0; iy < R_y; iy++)
+        for(int iy = 0; iy < Ry; iy++)
         {
-            for(int iz = 0; iz < R_z; iz++)
+            for(int iz = 0; iz < Rz; iz++)
             {
-                Abfs::Vector3_Order<int> temp_R(ix+R_minX, iy+R_minY, iz+R_minZ);
-                lm.all_R_coor.insert(temp_R);
+                Abfs::Vector3_Order<int> temp_R(ix+RminX, iy+RminY, iz+RminZ);
+                all_R_coor.insert(temp_R);
             }
         }
     }
@@ -73,8 +78,9 @@ void sparse_format::set_R_range(LCAO_Matrix &lm)
 
 void sparse_format::cal_dSTN_R(
 		LCAO_Matrix &lm,
+		Grid_Driver &grid,
 		const int &current_spin, 
-		const double &sparse_threshold)
+		const double &sparse_thr)
 {
     ModuleBase::TITLE("sparse_format","cal_dSTN_R");
 
@@ -91,31 +97,34 @@ void sparse_format::cal_dSTN_R(
         for(int I1 = 0; I1 < atom1->na; ++I1)
         {
             tau1 = atom1->tau[I1];
-            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+            grid.Find_atom(GlobalC::ucell, tau1, T1, I1);
             Atom* atom1 = &GlobalC::ucell.atoms[T1];
             const int start = GlobalC::ucell.itiaiw2iwt(T1,I1,0);
 
-            for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ++ad)
+            for(int ad = 0; ad < grid.getAdjacentNum()+1; ++ad)
             {
-                const int T2 = GlobalC::GridD.getType(ad);
-                const int I2 = GlobalC::GridD.getNatom(ad);
+                const int T2 = grid.getType(ad);
+                const int I2 = grid.getNatom(ad);
                 Atom* atom2 = &GlobalC::ucell.atoms[T2];
 
-                tau2 = GlobalC::GridD.getAdjacentTau(ad);
+                tau2 = grid.getAdjacentTau(ad);
                 dtau = tau2 - tau1;
                 double distance = dtau.norm() * GlobalC::ucell.lat0;
                 double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
 
                 bool adj = false;
 
-                if(distance < rcut) adj = true;
+				if(distance < rcut) 
+				{
+					adj = true;
+				}
                 else if(distance >= rcut)
                 {
-                    for(int ad0 = 0; ad0 < GlobalC::GridD.getAdjacentNum()+1; ++ad0)
+                    for(int ad0 = 0; ad0 < grid.getAdjacentNum()+1; ++ad0)
                     {
-                        const int T0 = GlobalC::GridD.getType(ad0);
+                        const int T0 = grid.getType(ad0);
 
-                        tau0 = GlobalC::GridD.getAdjacentTau(ad0);
+                        tau0 = grid.getAdjacentTau(ad0);
                         dtau1 = tau0 - tau1;
                         dtau2 = tau0 - tau2;
 
@@ -138,9 +147,9 @@ void sparse_format::cal_dSTN_R(
                     const int start2 = GlobalC::ucell.itiaiw2iwt(T2,I2,0);
 
 					Abfs::Vector3_Order<int> dR(
-							GlobalC::GridD.getBox(ad).x, 
-							GlobalC::GridD.getBox(ad).y, 
-							GlobalC::GridD.getBox(ad).z);
+							grid.getBox(ad).x, 
+							grid.getBox(ad).y, 
+							grid.getBox(ad).z);
 
                     for(int ii=0; ii<atom1->nw*GlobalV::NPOL; ii++)
                     {
@@ -165,17 +174,17 @@ void sparse_format::cal_dSTN_R(
                             if(GlobalV::NSPIN!=4)
                             {
                                 temp_value_double = lm.DHloc_fixedR_x[index];
-                                if (std::abs(temp_value_double) > sparse_threshold)
+                                if (std::abs(temp_value_double) > sparse_thr)
                                 {
                                     lm.dHRx_sparse[current_spin][dR][iw1_all][iw2_all] = temp_value_double;
                                 }
                                 temp_value_double = lm.DHloc_fixedR_y[index];
-                                if (std::abs(temp_value_double) > sparse_threshold)
+                                if (std::abs(temp_value_double) > sparse_thr)
                                 {
                                     lm.dHRy_sparse[current_spin][dR][iw1_all][iw2_all] = temp_value_double;
                                 }
                                 temp_value_double = lm.DHloc_fixedR_z[index];
-                                if (std::abs(temp_value_double) > sparse_threshold)
+                                if (std::abs(temp_value_double) > sparse_thr)
                                 {
                                     lm.dHRz_sparse[current_spin][dR][iw1_all][iw2_all] = temp_value_double;
                                 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
new file mode 100644
index 0000000000..84c8199038
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.h
@@ -0,0 +1,32 @@
+#ifndef SPARSE_FORMAT_DH_H 
+#define SPARSE_FORMAT_DH_H
+
+#include "module_cell/module_neighbor/sltk_atom_arrange.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+
+namespace sparse_format
+{
+	void cal_dH(
+			LCAO_Matrix &lm,
+			Grid_Driver &grid,
+			LCAO_gen_fixedH &gen_h, 
+			const int &current_spin, 
+			const double &sparse_thr,
+			Gint_k &gint_k);
+
+	// be called by 'cal_dH_sparse'
+	void set_R_range(
+			std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+			Grid_Driver &grid);
+
+	// be called by 'cal_dH_sparse' 
+	void cal_dSTN_R(
+			LCAO_Matrix &lm,
+		    Grid_Driver &grid,
+			const int &current_spin, 
+			const double &sparse_thr);
+}
+
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.cpp
new file mode 100644
index 0000000000..6538da7072
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.cpp
@@ -0,0 +1,4 @@
+#ifdef __EXX
+#include "LCAO_hamilt.hpp"
+#endif
+
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.h
new file mode 100644
index 0000000000..fb7ba0bcaf
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_exx.h
@@ -0,0 +1,29 @@
+#ifndef SPARSE_FORMAT_EXX_H 
+#define SPARSE_FORMAT_EXX_H
+
+#ifdef __EXX
+
+#include <vector>
+#include <map>
+#include <array>
+
+#include <RI/global/Global_Func-2.h>
+#include <RI/ri/Cell_Nearest.h>
+
+// use LCAO_Matrix
+#include "module_hamilt_lcao/hamilt_lcaodft/spar_exx.h"
+
+namespace sparse_format
+{
+
+    template<typename Tdata> void cal_HR_exx(
+            LCAO_Matrix &lm,
+            const int &current_spin,
+            const double &sparse_thr,
+            const int (&nmp)[3],
+            const std::vector< std::map <int, std::map < std::pair<int, std::array<int,3>>, RI::Tensor<Tdata> > >>& Hexxs);
+
+}
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.hpp"
+#endif
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
new file mode 100644
index 0000000000..5b018e9731
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.cpp
@@ -0,0 +1,301 @@
+#include "spar_hsr.h"
+#include "spar_dh.h"
+#include "spar_u.h"
+#include "spar_exx.h"
+#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+
+void sparse_format::cal_HSR(
+        const Parallel_Orbitals &pv,
+		LCAO_Matrix &lm,
+		Grid_Driver &grid,
+		const int &current_spin, 
+		const double &sparse_thr, 
+		const int (&nmp)[3], 
+		hamilt::Hamilt<std::complex<double>>* p_ham)
+{
+    ModuleBase::TITLE("sparse_format","cal_HSR");
+
+    sparse_format::set_R_range(lm.all_R_coor, grid);
+
+    const int nspin = GlobalV::NSPIN;
+
+    //cal_STN_R_sparse(current_spin, sparse_thr);
+    if(nspin==1 || nspin==2)
+    {
+        hamilt::HamiltLCAO<std::complex<double>, double>* p_ham_lcao = 
+        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham);
+
+		sparse_format::cal_HContainer_d(
+                pv,
+                current_spin, 
+				sparse_thr, 
+				*(p_ham_lcao->getHR()), 
+				lm.HR_sparse[current_spin]);
+
+		sparse_format::cal_HContainer_d(
+				pv,
+				current_spin, 
+				sparse_thr, 
+				*(p_ham_lcao->getSR()), 
+				lm.SR_sparse);
+    }
+	else if(nspin==4)
+	{
+		hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>* p_ham_lcao = 
+			dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham);
+
+		sparse_format::cal_HContainer_cd(
+				pv,
+				current_spin, 
+				sparse_thr, 
+				*(p_ham_lcao->getHR()), 
+				lm.HR_soc_sparse);
+
+		sparse_format::cal_HContainer_cd(
+				pv,
+				current_spin, 
+				sparse_thr, 
+				*(p_ham_lcao->getSR()), 
+				lm.SR_soc_sparse);
+	}
+	else
+	{
+        ModuleBase::WARNING_QUIT("cal_HSR","check the value of nspin.");       
+    }
+
+    // only old DFT+U method need to cal extra contribution to HR
+    if (GlobalV::dft_plus_u == 2)
+    {
+        if(nspin==1 || nspin==2)
+        {
+			cal_HR_dftu(
+					pv,
+					lm.all_R_coor,
+					lm.SR_sparse,
+					lm.HR_sparse,
+					current_spin, 
+					sparse_thr);
+		}
+		else if(nspin==4)
+		{
+			cal_HR_dftu_soc(
+					pv,
+					lm.all_R_coor,
+					lm.SR_soc_sparse,
+					lm.HR_soc_sparse,
+					current_spin, 
+					sparse_thr);
+        }
+		else
+		{
+			ModuleBase::WARNING_QUIT("cal_HSR","check the value of nspin.");       
+		}
+	}
+
+#ifdef __EXX
+#ifdef __MPI
+    // if EXX is considered
+    if( GlobalC::exx_info.info_global.cal_exx )
+    {
+		if(GlobalC::exx_info.info_ri.real_number)
+		{
+			sparse_format::cal_HR_exx(lm, current_spin, sparse_thr, nmp, *lm.Hexxd);
+		}
+		else
+		{
+			sparse_format::cal_HR_exx(lm, current_spin, sparse_thr, nmp, *lm.Hexxc);
+		}
+	}
+#endif // __MPI
+#endif // __EXX
+
+    sparse_format::clear_zero_elements(lm, current_spin, sparse_thr);
+
+    return;
+}
+
+
+void sparse_format::cal_HContainer_d(
+        const Parallel_Orbitals &pv,
+		const int &current_spin, 
+		const double &sparse_thr, 
+		const hamilt::HContainer<double>& hR, 
+		std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& target)
+{
+    ModuleBase::TITLE("sparse_format","cal_HContainer_d");
+
+    auto row_indexes = pv.get_indexes_row();
+    auto col_indexes = pv.get_indexes_col();
+    for(int iap=0;iap<hR.size_atom_pairs();++iap)
+    {
+        int atom_i = hR.get_atom_pair(iap).get_atom_i();
+        int atom_j = hR.get_atom_pair(iap).get_atom_j();
+        int start_i = pv.atom_begin_row[atom_i];
+        int start_j = pv.atom_begin_col[atom_j];
+        int row_size = pv.get_row_size(atom_i);
+        int col_size = pv.get_col_size(atom_j);
+        for(int iR=0;iR<hR.get_atom_pair(iap).get_R_size();++iR)
+        {
+            auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR);
+            int* r_index = hR.get_atom_pair(iap).get_R_index(iR);
+            Abfs::Vector3_Order<int> dR(r_index[0], r_index[1], r_index[2]);
+            for(int i=0;i<row_size;++i)
+            {
+                int mu = row_indexes[start_i+i];
+                for(int j=0;j<col_size;++j)
+                {
+                    int nu = col_indexes[start_j+j];
+                    const auto& value_tmp = matrix.get_value(i,j);
+                    if(std::abs(value_tmp)>sparse_thr)
+                    {
+                        target[dR][mu][nu] = value_tmp;
+                    }
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+void sparse_format::cal_HContainer_cd(
+        const Parallel_Orbitals &pv,
+		const int &current_spin, 
+		const double &sparse_thr, 
+		const hamilt::HContainer<std::complex<double>>& hR, 
+		std::map<Abfs::Vector3_Order<int>, 
+		std::map<size_t, std::map<size_t, std::complex<double>>>>& target)
+{
+    ModuleBase::TITLE("sparse_format","cal_HContainer_cd");
+
+    auto row_indexes = pv.get_indexes_row();
+    auto col_indexes = pv.get_indexes_col();
+    for(int iap=0;iap<hR.size_atom_pairs();++iap)
+    {
+        int atom_i = hR.get_atom_pair(iap).get_atom_i();
+        int atom_j = hR.get_atom_pair(iap).get_atom_j();
+        int start_i = pv.atom_begin_row[atom_i];
+        int start_j = pv.atom_begin_col[atom_j];
+        int row_size = pv.get_row_size(atom_i);
+        int col_size = pv.get_col_size(atom_j);
+        for(int iR=0;iR<hR.get_atom_pair(iap).get_R_size();++iR)
+        {
+            auto& matrix = hR.get_atom_pair(iap).get_HR_values(iR);
+            int* r_index = hR.get_atom_pair(iap).get_R_index(iR);
+            Abfs::Vector3_Order<int> dR(r_index[0], r_index[1], r_index[2]);
+            for(int i=0;i<row_size;++i)
+            {
+                int mu = row_indexes[start_i+i];
+                for(int j=0;j<col_size;++j)
+                {
+                    int nu = col_indexes[start_j+j];
+                    const auto& value_tmp = matrix.get_value(i,j);
+                    if(std::abs(value_tmp)>sparse_thr)
+                    {
+                        target[dR][mu][nu] = value_tmp;
+                    }
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+
+// in case there are elements smaller than the threshold
+void sparse_format::clear_zero_elements(
+        LCAO_Matrix &lm,
+		const int &current_spin, 
+		const double &sparse_thr)
+{
+    ModuleBase::TITLE("sparse_format","clear_zero_elements");
+
+    if(GlobalV::NSPIN != 4)
+    {
+        for (auto &R_loop : lm.HR_sparse[current_spin])
+        {
+            for (auto &row_loop : R_loop.second)
+            {
+                auto &col_map = row_loop.second;
+                auto iter = col_map.begin();
+                while (iter != col_map.end())
+                {
+                    if (std::abs(iter->second) <= sparse_thr)
+                    {
+                        col_map.erase(iter++);
+                    }
+                    else
+                    {
+                        iter++;
+                    }
+                }
+            }
+        }
+
+        for (auto &R_loop : lm.SR_sparse)
+        {
+            for (auto &row_loop : R_loop.second)
+            {
+                auto &col_map = row_loop.second;
+                auto iter = col_map.begin();
+                while (iter != col_map.end())
+                {
+                    if (std::abs(iter->second) <= sparse_thr)
+                    {
+                        col_map.erase(iter++);
+                    }
+                    else
+                    {
+                        iter++;
+                    }
+                }
+            }
+        }
+
+    }
+    else
+    {
+        for (auto &R_loop : lm.HR_soc_sparse)
+        {
+            for (auto &row_loop : R_loop.second)
+            {
+                auto &col_map = row_loop.second;
+                auto iter = col_map.begin();
+                while (iter != col_map.end())
+                {
+                    if (std::abs(iter->second) <= sparse_thr)
+                    {
+                        col_map.erase(iter++);
+                    }
+                    else
+                    {
+                        iter++;
+                    }
+                }// end while iter
+            }// end row loop
+        }// end R loop
+
+        for (auto &R_loop : lm.SR_soc_sparse)
+        {
+            for (auto &row_loop : R_loop.second)
+            {
+                auto &col_map = row_loop.second;
+                auto iter = col_map.begin();
+                while (iter != col_map.end())
+                {
+                    if (std::abs(iter->second) <= sparse_thr)
+                    {
+                        col_map.erase(iter++);
+                    }
+                    else
+                    {
+                        iter++;
+                    }
+                }// end while iter
+            }// end row_loop
+        }// end R_loop
+    }
+
+    return;
+}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
new file mode 100644
index 0000000000..908dfb1909
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h
@@ -0,0 +1,41 @@
+#ifndef SPARSE_FORMAT_HSR_H 
+#define SPARSE_FORMAT_HSR_H
+
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+
+namespace sparse_format
+{
+
+	void cal_HSR(
+            const Parallel_Orbitals &pv,
+			LCAO_Matrix &lm,
+			Grid_Driver &grid,
+			const int &current_spin, 
+			const double &sparse_thr, 
+			const int (&nmp)[3], 
+			hamilt::Hamilt<std::complex<double>>* p_ham);
+
+	void cal_HContainer_d(
+            const Parallel_Orbitals &pv,
+			const int &current_spin, 
+			const double &sparse_threshold, 
+			const hamilt::HContainer<double>& hR, 
+			std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>>& target);
+
+	void cal_HContainer_cd(
+            const Parallel_Orbitals &pv,
+			const int &current_spin, 
+			const double &sparse_threshold, 
+			const hamilt::HContainer<std::complex<double>>& hR, 
+			std::map<Abfs::Vector3_Order<int>, 
+			std::map<size_t, std::map<size_t, std::complex<double>>>>& target);
+
+	void clear_zero_elements(
+			LCAO_Matrix &lm,
+			const int &current_spin, 
+			const double &sparse_thr);
+
+}
+
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
new file mode 100644
index 0000000000..f8ee057b97
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.cpp
@@ -0,0 +1,195 @@
+#include <complex>
+#include "spar_st.h"
+#include "spar_dh.h"
+#include "spar_hsr.h"
+
+void sparse_format::cal_SR(
+        const Parallel_Orbitals &pv,
+		std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &SR_sparse,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &SR_soc_sparse,
+		Grid_Driver &grid,
+		const double &sparse_thr, 
+		hamilt::Hamilt<std::complex<double>>* p_ham)
+{
+    ModuleBase::TITLE("sparse_format","cal_SR");
+
+    sparse_format::set_R_range(all_R_coor, grid);
+
+    const int nspin = GlobalV::NSPIN;
+
+    //cal_STN_R_sparse(current_spin, sparse_thr);
+    if(nspin==1 || nspin==2)
+    {
+        hamilt::HamiltLCAO<std::complex<double>, double>* p_ham_lcao 
+        = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham);
+        const int cspin = 0;
+        sparse_format::cal_HContainer_d(pv, cspin, sparse_thr, *(p_ham_lcao->getSR()), SR_sparse);
+    }
+    else if(nspin==4)
+    {
+        hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>* p_ham_lcao 
+        = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham);
+        const int cspin = 0;
+        sparse_format::cal_HContainer_cd(pv, cspin, sparse_thr, *(p_ham_lcao->getSR()), SR_soc_sparse);
+    }
+
+    return;
+}
+
+
+void sparse_format::cal_TR(
+        const UnitCell &ucell,
+        const Parallel_Orbitals &pv,
+        LCAO_Matrix &lm,
+	    Grid_Driver &grid,
+		LCAO_gen_fixedH &gen_h,
+		const double &sparse_thr)
+{
+    ModuleBase::TITLE("sparse_format","cal_TR");
+    
+    //need to rebuild T(R)
+    lm.Hloc_fixedR.resize(lm.ParaV->nnr);
+    lm.zeros_HSR('T');
+
+    gen_h.build_ST_new('T', 0, ucell, lm.Hloc_fixedR.data());
+
+    sparse_format::set_R_range(lm.all_R_coor, grid);
+
+	sparse_format::cal_STN_R_for_T(
+			ucell, 
+			pv, 
+			lm, 
+			grid, 
+			sparse_thr);
+
+    return;
+}
+
+
+void sparse_format::cal_STN_R_for_T(
+        const UnitCell &ucell,
+        const Parallel_Orbitals &pv,
+        LCAO_Matrix &lm,
+		Grid_Driver &grid,
+		const double &sparse_thr)
+{
+    ModuleBase::TITLE("sparse_format","cal_STN_R_for_T");
+
+    const int nspin = GlobalV::NSPIN;
+
+    int index = 0;
+    ModuleBase::Vector3<double> dtau, tau1, tau2;
+    ModuleBase::Vector3<double> dtau1, dtau2, tau0;
+
+    double tmp=0.0;
+    std::complex<double> tmpc=std::complex<double>(0.0,0.0);
+
+    for(int T1 = 0; T1 < ucell.ntype; ++T1)
+    {
+        Atom* atom1 = &ucell.atoms[T1];
+        for(int I1 = 0; I1 < atom1->na; ++I1)
+        {
+            tau1 = atom1->tau[I1];
+            grid.Find_atom(ucell, tau1, T1, I1);
+            Atom* atom1 = &ucell.atoms[T1];
+            const int start = ucell.itiaiw2iwt(T1,I1,0);
+
+            for(int ad = 0; ad < grid.getAdjacentNum()+1; ++ad)
+            {
+                const int T2 = grid.getType(ad);
+                const int I2 = grid.getNatom(ad);
+                Atom* atom2 = &ucell.atoms[T2];
+
+                tau2 = grid.getAdjacentTau(ad);
+                dtau = tau2 - tau1;
+                double distance = dtau.norm() * ucell.lat0;
+                double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+
+                bool adj = false;
+
+                if(distance < rcut) 
+				{
+					adj = true;
+				}
+
+                else if(distance >= rcut)
+                {
+                    for(int ad0 = 0; ad0 < grid.getAdjacentNum()+1; ++ad0)
+                    {
+                        const int T0 = grid.getType(ad0);
+
+                        tau0 = grid.getAdjacentTau(ad0);
+                        dtau1 = tau0 - tau1;
+                        dtau2 = tau0 - tau2;
+
+                        double distance1 = dtau1.norm() * ucell.lat0;
+                        double distance2 = dtau2.norm() * ucell.lat0;
+
+                        double rcut1 = GlobalC::ORB.Phi[T1].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max();
+                        double rcut2 = GlobalC::ORB.Phi[T2].getRcut() + ucell.infoNL.Beta[T0].get_rcut_max();
+
+                        if( distance1 < rcut1 && distance2 < rcut2 )
+                        {
+                            adj = true;
+                            break;
+                        }
+                    }
+                }
+
+                if(adj)
+                {
+                    const int start2 = ucell.itiaiw2iwt(T2,I2,0);
+
+					Abfs::Vector3_Order<int> dR(
+							grid.getBox(ad).x, 
+							grid.getBox(ad).y, 
+							grid.getBox(ad).z);
+
+                    for(int ii=0; ii<atom1->nw*GlobalV::NPOL; ii++)
+                    {
+                        const int iw1_all = start + ii;
+                        const int mu = pv.global2local_row(iw1_all);
+
+						if(mu<0)
+						{
+							continue;
+						}
+
+                        for(int jj=0; jj<atom2->nw*GlobalV::NPOL; jj++)
+                        {
+                            int iw2_all = start2 + jj;
+                            const int nu = pv.global2local_col(iw2_all);
+
+							if(nu<0)
+							{
+								continue;
+							}
+
+                            if(nspin==1 || nspin==2)
+                            {
+                                tmp = lm.Hloc_fixedR[index];
+                                if (std::abs(tmp) > sparse_thr)
+                                {
+                                    lm.TR_sparse[dR][iw1_all][iw2_all] = tmp;
+                                }
+                            }
+                            else if(nspin==4)
+                            {
+                                tmpc = lm.Hloc_fixedR_soc[index];
+                                if(std::abs(tmpc) > sparse_thr)
+                                {
+                                    lm.TR_soc_sparse[dR][iw1_all][iw2_all] = tmpc;
+                                }
+                            }
+
+                            ++index;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return;
+}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
new file mode 100644
index 0000000000..8d6f24d2a7
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_st.h
@@ -0,0 +1,37 @@
+#ifndef SPARSE_FORMAT_ST_H 
+#define SPARSE_FORMAT_ST_H
+
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+
+namespace sparse_format
+{
+    //! calculate overlap matrix with lattice vector R
+	void cal_SR(
+            const Parallel_Orbitals &pv,
+			std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+			std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &SR_sparse,
+			std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &SR_soc_sparse,
+			Grid_Driver &grid,
+			const double &sparse_thr, 
+			hamilt::Hamilt<std::complex<double>>* p_ham);
+
+    //! calculate kinetic matrix with lattice vector R
+	void cal_TR(
+			const UnitCell &ucell,
+			const Parallel_Orbitals &pv,
+			LCAO_Matrix &lm,
+			Grid_Driver &grid,
+			LCAO_gen_fixedH &gen_h,
+			const double &sparse_thr);
+
+    //! cal_STN_R_for_T is only called by cal_TR
+	void cal_STN_R_for_T(
+			const UnitCell &ucell,
+			const Parallel_Orbitals &pv,
+			LCAO_Matrix &lm,
+			Grid_Driver &grid,
+			const double &sparse_thr);
+}
+
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.cpp
new file mode 100644
index 0000000000..fe0c903e1f
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.cpp
@@ -0,0 +1,246 @@
+#include "spar_u.h"
+#include "module_base/parallel_reduce.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_base/timer.h"
+#include "module_hamilt_lcao/module_dftu/dftu.h"
+
+void sparse_format::cal_HR_dftu(
+	    const Parallel_Orbitals &pv,
+        std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &SR_sparse,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> *HR_sparse,
+		const int &current_spin, 
+		const double &sparse_thr)
+{
+    ModuleBase::TITLE("sparse_format","cal_HR_dftu");
+    ModuleBase::timer::tick("sparse_format","cal_HR_dftu");
+
+    int total_R_num = all_R_coor.size();
+    int *nonzero_num = new int[total_R_num]();
+    ModuleBase::GlobalFunc::ZEROS(nonzero_num, total_R_num);
+
+    int count = 0;
+    for (auto &R_coor : all_R_coor)
+    {
+        auto iter = SR_sparse.find(R_coor);
+        if (iter != SR_sparse.end())
+        {
+            for (auto &row_loop : iter->second)
+            {
+                nonzero_num[count] += row_loop.second.size();
+            }
+        }
+        count++;
+    }
+
+    Parallel_Reduce::reduce_all(nonzero_num, total_R_num);
+
+    double *HR_tmp = new double[pv.nloc];
+    double *SR_tmp = new double[pv.nloc];
+
+    int ir=0;
+    int ic=0;
+    int iic=0;
+    auto &temp_HR_sparse = HR_sparse[current_spin];
+
+    count = 0;
+    for (auto &R_coor : all_R_coor)
+    {
+        if (nonzero_num[count] != 0)
+        {
+            ModuleBase::GlobalFunc::ZEROS(HR_tmp, pv.nloc);
+            ModuleBase::GlobalFunc::ZEROS(SR_tmp, pv.nloc);
+
+            auto iter = SR_sparse.find(R_coor);
+            if (iter != SR_sparse.end())
+            {
+                for (auto &row_loop : iter->second)
+                {
+                    ir = pv.global2local_row(row_loop.first);
+                    for (auto &col_loop : row_loop.second)
+                    {
+                        ic = pv.global2local_col(col_loop.first);
+                        if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+                        {
+                            iic = ir + ic * pv.nrow;
+                        }
+                        else
+                        {
+                            iic = ir * pv.ncol + ic;
+                        }
+                        SR_tmp[iic] = col_loop.second;
+                    }
+                }
+            }
+
+            GlobalC::dftu.cal_eff_pot_mat_R_double(current_spin, SR_tmp, HR_tmp);
+
+            for (int i = 0; i < GlobalV::NLOCAL; ++i)
+            {
+                ir = pv.global2local_row(i);
+                if (ir >= 0)
+                {
+                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
+                    {
+                        ic = pv.global2local_col(j);
+                        if (ic >= 0)
+                        {
+                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+                            {
+                                iic = ir + ic * pv.nrow;
+                            }
+                            else
+                            {
+                                iic = ir * pv.ncol + ic;
+                            }
+
+                            if (std::abs(HR_tmp[iic]) > sparse_thr)
+                            {
+                                double &value = temp_HR_sparse[R_coor][i][j];
+                                value += HR_tmp[iic];
+                                if (std::abs(value) <= sparse_thr)
+                                {
+                                    temp_HR_sparse[R_coor][i].erase(j);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+        }
+
+        count++;
+    }
+
+    delete[] nonzero_num;
+    delete[] HR_tmp;
+    delete[] SR_tmp;
+    nonzero_num = nullptr;
+    HR_tmp = nullptr;
+    SR_tmp = nullptr;
+
+    ModuleBase::timer::tick("sparse_format","cal_HR_dftu_sparse");
+
+    return;
+}
+
+
+void sparse_format::cal_HR_dftu_soc(
+	    const Parallel_Orbitals &pv,
+        std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &SR_soc_sparse,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &HR_soc_sparse,
+		const int &current_spin, 
+		const double &sparse_thr)
+{
+    ModuleBase::TITLE("sparse_format","cal_HR_dftu_soc");
+    ModuleBase::timer::tick("sparse_format","cal_HR_dftu_soc");
+
+    int total_R_num = all_R_coor.size();
+    int *nonzero_num = new int[total_R_num]();
+    ModuleBase::GlobalFunc::ZEROS(nonzero_num, total_R_num);
+    int count = 0;
+    for (auto &R_coor : all_R_coor)
+    {
+        auto iter = SR_soc_sparse.find(R_coor);
+        if (iter != SR_soc_sparse.end())
+        {
+            for (auto &row_loop : iter->second)
+            {
+                nonzero_num[count] += row_loop.second.size();
+            }
+        }
+        count++;
+    }
+
+    Parallel_Reduce::reduce_all(nonzero_num, total_R_num);
+
+    std::complex<double> *HR_soc_tmp = new std::complex<double>[pv.nloc];
+    std::complex<double> *SR_soc_tmp = new std::complex<double>[pv.nloc];
+
+    int ir=0;
+    int ic=0;
+    int iic=0;
+
+    count = 0;
+    for (auto &R_coor : all_R_coor)
+    {
+        if (nonzero_num[count] != 0)
+        {
+            ModuleBase::GlobalFunc::ZEROS(HR_soc_tmp, pv.nloc);
+            ModuleBase::GlobalFunc::ZEROS(SR_soc_tmp, pv.nloc);
+
+            auto iter = SR_soc_sparse.find(R_coor);
+            if (iter != SR_soc_sparse.end())
+            {
+                for (auto &row_loop : iter->second)
+                {
+                    ir = pv.global2local_row(row_loop.first);
+                    for (auto &col_loop : row_loop.second)
+                    {
+                        ic = pv.global2local_col(col_loop.first);
+                        if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+                        {
+                            iic = ir + ic * pv.nrow;
+                        }
+                        else
+                        {
+                            iic = ir * pv.ncol + ic;
+                        }
+                        SR_soc_tmp[iic] = col_loop.second;
+                    }
+                }
+            }
+
+            GlobalC::dftu.cal_eff_pot_mat_R_complex_double(current_spin, SR_soc_tmp, HR_soc_tmp);
+
+            for (int i = 0; i < GlobalV::NLOCAL; ++i)
+            {
+                ir = pv.global2local_row(i);
+                if (ir >= 0)
+                {
+                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
+                    {
+                        ic = pv.global2local_col(j);
+                        if (ic >= 0)
+                        {
+                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+                            {
+                                iic = ir + ic * pv.nrow;
+                            }
+                            else
+                            {
+                                iic = ir * pv.ncol + ic;
+                            }
+
+                            if (std::abs(HR_soc_tmp[iic]) > sparse_thr)
+                            {
+                                std::complex<double> &value = HR_soc_sparse[R_coor][i][j];
+                                value += HR_soc_tmp[iic];
+                                if (std::abs(value) <= sparse_thr)
+                                {
+                                    HR_soc_sparse[R_coor][i].erase(j);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+        }
+
+        count++;
+    }
+
+    delete[] nonzero_num;
+    delete[] HR_soc_tmp;
+    delete[] SR_soc_tmp;
+    nonzero_num = nullptr;
+    HR_soc_tmp = nullptr;
+    SR_soc_tmp = nullptr;
+
+    ModuleBase::timer::tick("sparse_format","calculat_HR_dftu_soc");
+
+    return;
+}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.h b/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.h
new file mode 100644
index 0000000000..e26d808b5a
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_u.h
@@ -0,0 +1,31 @@
+#ifndef SPARSE_FORMAT_U_H 
+#define SPARSE_FORMAT_U_H
+
+#include "module_cell/module_neighbor/sltk_atom_arrange.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+
+
+namespace sparse_format
+{
+
+	void cal_HR_dftu(
+		const Parallel_Orbitals &pv,
+		std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+		std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &SR_sparse,
+		std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> *HR_sparse,
+		const int &current_spin, 
+		const double &sparse_thr);
+
+	void cal_HR_dftu_soc(
+		const Parallel_Orbitals &pv,
+		std::set<Abfs::Vector3_Order<int>> &all_R_coor,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &SR_soc_sparse,
+        std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, std::complex<double>>>> &HR_soc_sparse,
+		const int &current_spin, 
+		const double &sparse_thr);
+
+}
+
+#endif
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.h b/source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.h
deleted file mode 100644
index 1d7a09af78..0000000000
--- a/source/module_hamilt_lcao/hamilt_lcaodft/sparse_format.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef SPARSE_FORMAT_H 
-#define SPARSE_FORMAT_H
-
-#include "module_cell/module_neighbor/sltk_atom_arrange.h"
-#include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
-
-
-namespace sparse_format
-{
-
-void cal_dH(
-		LCAO_Matrix &lm,
-		LCAO_gen_fixedH &gen_h, 
-		const int &current_spin, 
-		const double &sparse_threshold,
-		Gint_k &gint_k);
-
-// be called by 'cal_dH_sparse'
-void set_R_range(LCAO_Matrix &lm);
-
-// be called by 'cal_dH_sparse' 
-void cal_dSTN_R(
-		LCAO_Matrix &lm,
-		const int &current_spin, 
-		const double &sparse_threshold);
-
-
-}
-
-#endif
diff --git a/source/module_hamilt_lcao/module_tddft/evolve_elec.h b/source/module_hamilt_lcao/module_tddft/evolve_elec.h
index 4cb535d6d0..5448903a69 100644
--- a/source/module_hamilt_lcao/module_tddft/evolve_elec.h
+++ b/source/module_hamilt_lcao/module_tddft/evolve_elec.h
@@ -5,7 +5,6 @@
 #include "module_base/global_variable.h"
 #include "module_esolver/esolver_ks_lcao.h"
 #include "module_esolver/esolver_ks_lcao_tddft.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "module_psi/psi.h"
 
diff --git a/source/module_hsolver/diago_bpcg.cpp b/source/module_hsolver/diago_bpcg.cpp
index ec6457b7ee..a1efc98a8a 100644
--- a/source/module_hsolver/diago_bpcg.cpp
+++ b/source/module_hsolver/diago_bpcg.cpp
@@ -81,9 +81,14 @@ void DiagoBPCG<T, Device>::line_minimize(
     line_minimize_with_block_op()(grad_in.data<T>(), hgrad_in.data<T>(), psi_out.data<T>(), hpsi_out.data<T>(), this->n_basis, this->n_basis, this->n_band);
 }
 
+
 // Finally, the last two!
 template<typename T, typename Device>
-void DiagoBPCG<T, Device>::orth_cholesky(ct::Tensor& workspace_in, ct::Tensor& psi_out, ct::Tensor& hpsi_out, ct::Tensor& hsub_out)
+void DiagoBPCG<T, Device>::orth_cholesky(
+		ct::Tensor& workspace_in, 
+		ct::Tensor& psi_out, 
+		ct::Tensor& hpsi_out, 
+		ct::Tensor& hsub_out)
 {
     // hsub_out = psi_out * transc(psi_out)
     ct::EinsumOption option(
@@ -113,7 +118,17 @@ void DiagoBPCG<T, Device>::calc_grad_with_block(
         ct::Tensor& grad_out,
         ct::Tensor& grad_old_out)
 {
-    calc_grad_with_block_op()(prec_in.data<Real>(), err_out.data<Real>(), beta_out.data<Real>(), psi_in.data<T>(), hpsi_in.data<T>(), grad_out.data<T>(), grad_old_out.data<T>(), this->n_basis, this->n_basis, this->n_band);
+    calc_grad_with_block_op()(
+			prec_in.data<Real>(), 
+			err_out.data<Real>(), 
+			beta_out.data<Real>(), 
+			psi_in.data<T>(), 
+			hpsi_in.data<T>(), 
+			grad_out.data<T>(), 
+			grad_old_out.data<T>(), 
+			this->n_basis, 
+			this->n_basis, 
+			this->n_band);
 }
 
 template<typename T, typename Device>
@@ -136,6 +151,8 @@ void DiagoBPCG<T, Device>::orth_projection(
     option = ct::EinsumOption(
         /*conj_x=*/false, /*conj_y=*/false, /*alpha=*/-1.0, /*beta=*/1.0, /*Tensor out=*/&grad_out);
     grad_out = ct::op::einsum("ij,jk->ik", hsub_in, psi_in, option);
+
+    return;
 }
 
 template<typename T, typename Device>
@@ -149,6 +166,8 @@ void DiagoBPCG<T, Device>::rotate_wf(
     workspace_in = ct::op::einsum("ij,jk->ik", hsub_in, psi_out, option);
 
     syncmem_complex_op()(psi_out.template data<T>(), workspace_in.template data<T>(), this->n_band * this->n_basis);
+
+    return;
 }
 
 template<typename T, typename Device>
@@ -161,6 +180,8 @@ void DiagoBPCG<T, Device>::calc_hpsi_with_block(
     psi::Range all_bands_range(1, psi_in.get_current_k(), 0, psi_in.get_nbands() - 1);
     hpsi_info info(&psi_in, all_bands_range, hpsi_out.data<T>());
     hamilt_in->ops->hPsi(info);
+
+    return;
 }
 
 template<typename T, typename Device>
@@ -178,6 +199,8 @@ void DiagoBPCG<T, Device>::diag_hsub(
     hsub_out = ct::op::einsum("ij,kj->ik", psi_in, hpsi_in, option);
 
     ct::kernels::lapack_dnevd<T, ct_Device>()('V', 'U', hsub_out.data<T>(), this->n_band, eigenvalue_out.data<Real>());
+
+    return;
 }
 
 template<typename T, typename Device>
@@ -201,6 +224,8 @@ void DiagoBPCG<T, Device>::calc_hsub_with_block(
     // hpsi_out[n_basis, n_band] = psi_out[n_basis, n_band] x hsub_out[n_band, n_band]
     this->rotate_wf(hsub_out, psi_out, workspace_in);
     this->rotate_wf(hsub_out, hpsi_out, workspace_in);
+ 
+    return;
 }
 
 template<typename T, typename Device>
@@ -217,6 +242,8 @@ void DiagoBPCG<T, Device>::calc_hsub_with_block_exit(
     // inplace matmul to get the initial guessed wavefunction psi.
     // psi_out[n_basis, n_band] = psi_out[n_basis, n_band] x hsub_out[n_band, n_band]
     this->rotate_wf(hsub_out, psi_out, workspace_in);
+
+    return;
 }
 
 template<typename T, typename Device>
@@ -228,6 +255,7 @@ void DiagoBPCG<T, Device>::diag(
     const int current_scf_iter = hsolver::DiagoIterAssist<T, Device>::SCF_ITER;
     // Get the pointer of the input psi
     this->psi = std::move(ct::TensorMap(psi_in.get_pointer(), t_type, device_type, {this->n_band, this->n_basis}));
+
     // Update the precondition array
     this->calc_prec();
 
@@ -235,7 +263,9 @@ void DiagoBPCG<T, Device>::diag(
     this->calc_hsub_with_block(hamilt_in, psi_in, this->psi, this->hpsi, this->hsub, this->work, this->eigen);
 
     setmem_complex_op()(this->grad_old.template data<T>(), 0, this->n_basis * this->n_band);
+
     setmem_var_op()(this->beta.template data<Real>(), 1E+40, this->n_band);
+
     int ntry = 0;
     int max_iter = current_scf_iter > 1 ?
                    this->nline :
@@ -276,8 +306,12 @@ void DiagoBPCG<T, Device>::diag(
             this->calc_hsub_with_block(hamilt_in, psi_in, this->psi, this->hpsi, this->hsub, this->work, this->eigen);
         }
     } while (ntry < max_iter && this->test_error(this->err_st, this->all_band_cg_thr));
+
     this->calc_hsub_with_block_exit(this->psi, this->hpsi, this->hsub, this->work, this->eigen);
+
     syncmem_var_d2h_op()(eigenvalue_in, this->eigen.template data<Real>(), this->n_band);
+
+    return;
 }
 
 template class DiagoBPCG<std::complex<float>, psi::DEVICE_CPU>;
@@ -287,4 +321,4 @@ template class DiagoBPCG<std::complex<float>, psi::DEVICE_GPU>;
 template class DiagoBPCG<std::complex<double>, psi::DEVICE_GPU>;
 #endif
 
-} // namespace hsolver
\ No newline at end of file
+} // namespace hsolver
diff --git a/source/module_io/dos_nao.cpp b/source/module_io/dos_nao.cpp
index 4beaca4bba..1a36df5f12 100644
--- a/source/module_io/dos_nao.cpp
+++ b/source/module_io/dos_nao.cpp
@@ -7,7 +7,8 @@ namespace ModuleIO
 {
     /// @brief manege the output of dos in numerical atomic basis case
 /// @param[in] psi
-/// @param[in] uhm
+/// @param[in] lm
+/// @param[in] pv
 /// @param[in] ekb
 /// @param[in] wg
 /// @param[in] dos_edelta_ev
@@ -19,23 +20,25 @@ namespace ModuleIO
 /// @param[in] eferm
 /// @param[in] nbands
     template<typename T>
-    void out_dos_nao(
-        const psi::Psi<T>* psi,
-        LCAO_Hamilt& uhm,
-        const ModuleBase::matrix& ekb,
-        const ModuleBase::matrix& wg,
-        const double& dos_edelta_ev,
-        const double& dos_scale,
-        const double& dos_sigma,
-        const K_Vectors& kv,
-        const Parallel_Kpoints& Pkpoints,
-        const UnitCell& ucell,
-        const elecstate::efermi& eferm,
-        int nbands,
-        hamilt::Hamilt<T>* p_ham)
+		void out_dos_nao(
+				const psi::Psi<T>* psi,
+				LCAO_Matrix &lm,
+				const Parallel_Orbitals &pv,
+				const ModuleBase::matrix& ekb,
+				const ModuleBase::matrix& wg,
+				const double& dos_edelta_ev,
+				const double& dos_scale,
+				const double& dos_sigma,
+				const K_Vectors& kv,
+				const Parallel_Kpoints& Pkpoints,
+				const UnitCell& ucell,
+				const elecstate::efermi& eferm,
+				int nbands,
+				hamilt::Hamilt<T>* p_ham)
 {
-    ModuleBase::TITLE("Driver", "init");
-    write_dos_lcao(psi, uhm, ekb, wg, dos_edelta_ev, dos_scale, dos_sigma, kv, p_ham);
+    ModuleBase::TITLE("Module_IO", "out_dos_nao");
+
+    write_dos_lcao(psi, lm, pv, ekb, wg, dos_edelta_ev, dos_scale, dos_sigma, kv, p_ham);
 
     int nspin0 = (GlobalV::NSPIN == 2) ? 2 : 1;
     if (INPUT.out_dos == 3)
@@ -56,12 +59,14 @@ namespace ModuleIO
     {
         GlobalV::ofs_running << " Fermi energy (spin = 1) is " << eferm.ef_up << " Rydberg" << std::endl;
         GlobalV::ofs_running << " Fermi energy (spin = 2) is " << eferm.ef_dw << " Rydberg" << std::endl;
-    }
-    }
+	}
+}
 
-    template void out_dos_nao(const psi::Psi<double>* psi,
-        LCAO_Hamilt& uhm,
-        const ModuleBase::matrix& ekb,
+template void out_dos_nao(
+        const psi::Psi<double>* psi,
+		LCAO_Matrix &lm,
+		const Parallel_Orbitals &pv,
+		const ModuleBase::matrix& ekb,
         const ModuleBase::matrix& wg,
         const double& dos_edelta_ev,
         const double& dos_scale,
@@ -71,10 +76,13 @@ namespace ModuleIO
         const UnitCell& ucell,
         const elecstate::efermi& eferm,
         int nbands,
-        hamilt::Hamilt<double>* p_ham);
-    template void out_dos_nao(const psi::Psi<std::complex<double>>* psi,
-        LCAO_Hamilt& uhm,
-        const ModuleBase::matrix& ekb,
+		hamilt::Hamilt<double>* p_ham);
+
+template void out_dos_nao(
+        const psi::Psi<std::complex<double>>* psi,
+		LCAO_Matrix &lm,
+		const Parallel_Orbitals &pv,
+		const ModuleBase::matrix& ekb,
         const ModuleBase::matrix& wg,
         const double& dos_edelta_ev,
         const double& dos_scale,
@@ -85,4 +93,4 @@ namespace ModuleIO
         const elecstate::efermi& eferm,
         int nbands,
         hamilt::Hamilt<std::complex<double>>* p_ham);
-} // namespace ModuleIO
\ No newline at end of file
+} // namespace ModuleIO
diff --git a/source/module_io/dos_nao.h b/source/module_io/dos_nao.h
index dd86fc86ae..b40bc19f41 100644
--- a/source/module_io/dos_nao.h
+++ b/source/module_io/dos_nao.h
@@ -1,5 +1,6 @@
 #ifndef DOS_LCAO_H
 #define DOS_LCAO_H
+
 #include "module_io/nscf_fermi_surf.h"
 #include "module_io/write_dos_lcao.h"
 #include "module_elecstate/fp_energy.h"
@@ -8,20 +9,21 @@
 namespace ModuleIO
 {
     template<typename T>
-    void out_dos_nao(
-        const psi::Psi<T>* psi,
-                  LCAO_Hamilt& uhm,
-                  const ModuleBase::matrix& ekb,
-                  const ModuleBase::matrix& wg,
-                  const double& dos_edelta_ev,
-                  const double& dos_scale,
-                  const double& dos_sigma,
-                  const K_Vectors& kv,
-                  const Parallel_Kpoints& Pkpoints,
-                  const UnitCell& ucell,
-                  const elecstate::efermi& eferm,
-                  int nbands,
-        hamilt::Hamilt<T>* p_ham);
+		void out_dos_nao(
+				const psi::Psi<T>* psi,
+				LCAO_Matrix &lm,
+				const Parallel_Orbitals &pv,
+				const ModuleBase::matrix& ekb,
+				const ModuleBase::matrix& wg,
+				const double& dos_edelta_ev,
+				const double& dos_scale,
+				const double& dos_sigma,
+				const K_Vectors& kv,
+				const Parallel_Kpoints& Pkpoints,
+				const UnitCell& ucell,
+				const elecstate::efermi& eferm,
+				int nbands,
+				hamilt::Hamilt<T>* p_ham);
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_io/mulliken_charge.h b/source/module_io/mulliken_charge.h
index c1fa62cc1b..bc00dd6022 100644
--- a/source/module_io/mulliken_charge.h
+++ b/source/module_io/mulliken_charge.h
@@ -4,7 +4,6 @@
 #include "module_base/matrix.h"
 #include "module_base/complexmatrix.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_cell/klist.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "module_elecstate/elecstate.h"
@@ -14,7 +13,12 @@
 namespace ModuleIO
 {
     template<typename T>
-    void out_mulliken(const int& step, LCAO_Matrix* LM, const elecstate::ElecState* pelec, const K_Vectors& kv, hamilt::Hamilt<T>* ham_in);
+		void out_mulliken(
+				const int& step, 
+				LCAO_Matrix* LM, 
+				const elecstate::ElecState* pelec, 
+				const K_Vectors& kv, 
+				hamilt::Hamilt<T>* ham_in);
 
     /* 
     1. cal_mulliken:    for gamma-only
@@ -27,9 +31,12 @@ namespace ModuleIO
     */
 
     template<typename T>
-    ModuleBase::matrix cal_mulliken(const std::vector<std::vector<T>>& dm,
-        LCAO_Matrix* LM, const K_Vectors& kv, hamilt::Hamilt<T>* ham_in
-    );
+		ModuleBase::matrix cal_mulliken(
+				const std::vector<std::vector<T>>& dm,
+				LCAO_Matrix* LM, 
+				const K_Vectors& kv, 
+				hamilt::Hamilt<T>* ham_in
+				);
 
 
     std::vector<std::vector<std::vector<double>>> convert(const ModuleBase::matrix &orbMulP);
@@ -43,4 +50,4 @@ namespace ModuleIO
         return result;
     }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_io/output_mat_sparse.cpp b/source/module_io/output_mat_sparse.cpp
index d68e218eb2..401606e4a6 100644
--- a/source/module_io/output_mat_sparse.cpp
+++ b/source/module_io/output_mat_sparse.cpp
@@ -1,6 +1,7 @@
 #include "output_mat_sparse.h"
 #include "cal_r_overlap_R.h"
 #include "write_HS_R.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h" // for ucell
 
 namespace ModuleIO
 {
@@ -12,12 +13,12 @@ namespace ModuleIO
         int out_mat_t,
         int out_mat_r,
         int istep,
-        const ModuleBase::matrix& v_eff,
-        const Parallel_Orbitals& pv,
-        LCAO_Hamilt& UHM,
+        const ModuleBase::matrix &v_eff,
+        const Parallel_Orbitals &pv,
         LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02
-        Gint_k& gint_k, // mohan add 2024-04-01
-        LCAO_Matrix& LM,
+        Gint_k &gint_k, // mohan add 2024-04-01
+        LCAO_Matrix &lm,
+        Grid_Driver &grid, // mohan add 2024-04-06
         const K_Vectors& kv,
         hamilt::Hamilt<T>* p_ham)
     : _out_mat_hsR(out_mat_hsR),
@@ -27,10 +28,10 @@ namespace ModuleIO
       _istep(istep),
       _v_eff(v_eff),
       _pv(pv),
-      _UHM(UHM),
       _gen_h(gen_h), // mohan add 2024-04-02
       _gint_k(gint_k), // mohan add 2024-04-01
-      _LM(LM),
+      _lm(lm),
+      _grid(grid), // mohan add 2024-04-06
       _kv(kv),
       _p_ham(p_ham)
 {
@@ -48,25 +49,38 @@ void Output_Mat_Sparse<std::complex<double>>::write(void)
     //! generate a file containing the Hamiltonian and S(overlap) matrices 
     if (_out_mat_hsR)
     {
-        output_HS_R(_istep, this->_v_eff, this->_UHM, _kv, _p_ham);
+		output_HSR(
+				_istep, 
+				this->_v_eff, 
+                this->_pv,
+				this->_lm, 
+                this->_grid,
+				_kv, 
+				_p_ham);
     }
 
     //! generate a file containing the kinetic energy matrix
     if (_out_mat_t)
     {
-        output_T_R(_istep, this->_UHM, this->_gen_h); // LiuXh add 2019-07-15
+		output_TR(
+				_istep, 
+                GlobalC::ucell,
+                this->_pv, 
+				this->_lm, 
+				this->_grid,
+				this->_gen_h); // LiuXh add 2019-07-15
     }
 
     //! generate a file containing the derivatives of the Hamiltonian matrix (in Ry/Bohr)
     if (_out_mat_dh)
     {
-		output_dH_R(
+		output_dHR(
 				_istep, 
 				this->_v_eff, 
-				this->_UHM, 
                 this->_gen_h,
 				this->_gint_k, // mohan add 2024-04-01
-				this->_LM,
+				this->_lm,
+                this->_grid, // mohan add 2024-04-06
 				_kv); // LiuXh add 2019-07-15
 	}
 
@@ -77,13 +91,15 @@ void Output_Mat_Sparse<std::complex<double>>::write(void)
         r_matrix.init(this->_pv);
         if (_out_mat_hsR)
         {
-            r_matrix.out_rR_other(_istep, this->_LM.output_R_coor);
+            r_matrix.out_rR_other(_istep, this->_lm.output_R_coor);
         }
         else
         {
             r_matrix.out_rR(_istep);
         }
     }
+
+    return;
 }
 
 template class Output_Mat_Sparse<double>;
diff --git a/source/module_io/output_mat_sparse.h b/source/module_io/output_mat_sparse.h
index c655226089..a8c023e872 100644
--- a/source/module_io/output_mat_sparse.h
+++ b/source/module_io/output_mat_sparse.h
@@ -2,8 +2,8 @@
 #define OUTPUT_MAT_SPARSE_H
 
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "module_hsolver/hsolver_lcao.h"
 #include "output_interface.h"
@@ -15,19 +15,20 @@ namespace ModuleIO
     class Output_Mat_Sparse : public Output_Interface
 {
   public:
-    Output_Mat_Sparse(int out_mat_hsR,
+    Output_Mat_Sparse(
+        int out_mat_hsR,
         int out_mat_dh,
         int out_mat_t,
         int out_mat_r,
         int istep,
-        const ModuleBase::matrix& v_eff,
-        const Parallel_Orbitals& pv,
-        LCAO_Hamilt& UHM,
+        const ModuleBase::matrix &v_eff,
+        const Parallel_Orbitals &pv,
         LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02
         Gint_k &gint_k, // mohan add 2024-04-01
-        LCAO_Matrix& LM,
-        const K_Vectors& kv,
-        hamilt::Hamilt<T>* p_ham);
+        LCAO_Matrix &lm,
+        Grid_Driver &grid, // mohan add 2024-04-06
+        const K_Vectors &kv,
+        hamilt::Hamilt<T> *p_ham);
 
     void write() override;
 
@@ -51,13 +52,14 @@ namespace ModuleIO
 
     const Parallel_Orbitals& _pv;
 
-    LCAO_Hamilt& _UHM;
-
     LCAO_gen_fixedH& _gen_h; // mohan add 2024-04-02
 
     Gint_k& _gint_k; // mohan add 2024-04-01
 
-    LCAO_Matrix& _LM;
+    LCAO_Matrix& _lm;
+
+    // mohan fix bug 2024-04-07, a typical bug!!!
+    Grid_Driver& _grid; // mohan add 2024-04-06
 
     const K_Vectors& _kv;
 
diff --git a/source/module_io/td_current_io.h b/source/module_io/td_current_io.h
index 63643a179e..b699cf8c59 100644
--- a/source/module_io/td_current_io.h
+++ b/source/module_io/td_current_io.h
@@ -2,8 +2,8 @@
 #define TD_CURRENT_H
 
 #include "module_elecstate/module_dm/density_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
 #include "module_elecstate/elecstate_lcao.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_psi/psi.h"
 
 namespace ModuleIO
diff --git a/source/module_io/write_HS_R.cpp b/source/module_io/write_HS_R.cpp
index 41b19f1d0f..093a21e320 100644
--- a/source/module_io/write_HS_R.cpp
+++ b/source/module_io/write_HS_R.cpp
@@ -1,39 +1,59 @@
-#include "write_HS_R.h"
-
 #include "module_base/timer.h"
+#include "write_HS_R.h"
 #include "write_HS_sparse.h"
-
-#include "module_hamilt_lcao/hamilt_lcaodft/sparse_format.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/spar_hsr.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/spar_dh.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/spar_st.h"
 
 
 // if 'binary=true', output binary file.
-// The 'sparse_threshold' is the accuracy of the sparse matrix. 
-// If the absolute value of the matrix element is less than or equal to the 'sparse_threshold', it will be ignored.
-void ModuleIO::output_HS_R(const int& istep,
+// The 'sparse_thr' is the accuracy of the sparse matrix. 
+// If the absolute value of the matrix element is less than or equal to the 'sparse_thr', it will be ignored.
+void ModuleIO::output_HSR(const int& istep,
                            const ModuleBase::matrix& v_eff,
-                           LCAO_Hamilt& uhm,
+                           const Parallel_Orbitals &pv,
+                           LCAO_Matrix& lm,
+                           Grid_Driver &grid, // mohan add 2024-04-06
                            const K_Vectors& kv,
                            hamilt::Hamilt<std::complex<double>>* p_ham,
                            const std::string& SR_filename,
                            const std::string& HR_filename_up,
                            const std::string HR_filename_down,
                            const bool& binary,
-                           const double& sparse_threshold)
+                           const double& sparse_thr)
 {
-    ModuleBase::TITLE("ModuleIO","output_HS_R"); 
-    ModuleBase::timer::tick("ModuleIO","output_HS_R"); 
+    ModuleBase::TITLE("ModuleIO","output_HSR"); 
+    ModuleBase::timer::tick("ModuleIO","output_HSR"); 
+
+    const int nspin = GlobalV::NSPIN;
 
-    if(GlobalV::NSPIN==1||GlobalV::NSPIN==4)
+    if(nspin==1||nspin==4)
     {
-        // mohan add 2024-04-02
         const int spin_now = 0;
         // jingan add 2021-6-4, modify 2021-12-2
-        uhm.cal_HSR_sparse(spin_now, sparse_threshold, kv.nmp, p_ham);
-    }
-    else if(GlobalV::NSPIN==2)
+		sparse_format::cal_HSR(
+				pv,
+				lm,
+                grid,
+				spin_now, 
+				sparse_thr, 
+				kv.nmp, 
+				p_ham);
+	}
+    else if(nspin==2)
     {
+        const int spin_now = GlobalV::CURRENT_SPIN;
+
         // save HR of current_spin first
-        uhm.cal_HSR_sparse(GlobalV::CURRENT_SPIN, sparse_threshold, kv.nmp, p_ham);
+		sparse_format::cal_HSR(
+				pv,
+				lm,
+				grid,
+				spin_now, 
+				sparse_thr, 
+				kv.nmp, 
+				p_ham);
+
         // cal HR of the other spin
         if(GlobalV::VL_IN_H)
         {
@@ -51,55 +71,78 @@ void ModuleIO::output_HS_R(const int& istep,
             p_ham->refresh();
             p_ham->updateHk(ik);
         }
-        uhm.cal_HSR_sparse(GlobalV::CURRENT_SPIN, sparse_threshold, kv.nmp, p_ham);
+
+		sparse_format::cal_HSR(
+				pv,
+				lm,
+				grid,
+				GlobalV::CURRENT_SPIN, 
+				sparse_thr, 
+				kv.nmp, 
+				p_ham);
     }
 
-    ModuleIO::save_HSR_sparse(istep, *uhm.LM, sparse_threshold, binary, SR_filename, HR_filename_up, HR_filename_down);
-    uhm.destroy_all_HSR_sparse();
+	ModuleIO::save_HSR_sparse(
+			istep, 
+			lm, 
+			sparse_thr, 
+			binary, 
+			SR_filename, 
+			HR_filename_up, 
+			HR_filename_down);
+
+	lm.destroy_HS_R_sparse();
 
-    ModuleBase::timer::tick("ModuleIO","output_HS_R"); 
+    ModuleBase::timer::tick("ModuleIO","output_HSR"); 
     return;
 }
 
-void ModuleIO::output_dH_R(const int& istep,
-                           const ModuleBase::matrix& v_eff,
-                           LCAO_Hamilt& uhm,
-                           LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02
-                           Gint_k& gint_k,  // mohan add 2024-04-01
+
+void ModuleIO::output_dHR(const int &istep,
+                           const ModuleBase::matrix &v_eff,
+                           LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02
+                           Gint_k &gint_k,  // mohan add 2024-04-01
                            LCAO_Matrix &lm,  // mohan add 2024-04-01
+                           Grid_Driver &grid, // mohan add 2024-04-06
                            const K_Vectors& kv,
-                           const bool& binary,
-                           const double& sparse_threshold)
+                           const bool &binary,
+                           const double &sparse_thr)
 {
-    ModuleBase::TITLE("ModuleIO","output_dH_R"); 
-    ModuleBase::timer::tick("ModuleIO","output_dH_R"); 
+    ModuleBase::TITLE("ModuleIO","output_dHR"); 
+    ModuleBase::timer::tick("ModuleIO","output_dHR"); 
 
     lm.Hloc_fixedR.resize(lm.ParaV->nnr);
+
     gint_k.allocate_pvdpR();
-    if(GlobalV::NSPIN==1||GlobalV::NSPIN==4)
+
+    const int nspin = GlobalV::NSPIN;
+
+    if(nspin==1||nspin==4)
     {
         // mohan add 2024-04-01
-        assert(GlobalV::CURRENT_SPIN==0);
+        const int cspin = GlobalV::CURRENT_SPIN; 
 
 		sparse_format::cal_dH(
                 lm,
+                grid,
 				gen_h,
-				GlobalV::CURRENT_SPIN, 
-				sparse_threshold, 
+				cspin, 
+				sparse_thr, 
 				gint_k);
 	}
-    else if(GlobalV::NSPIN==2)
+    else if(nspin==2)
     {
         for (int ik = 0; ik < kv.nks; ik++)
         {
             if (ik == 0 || ik == kv.nks / 2)
             {
-                if(GlobalV::NSPIN == 2)
+                if(nspin == 2)
                 {
                     GlobalV::CURRENT_SPIN = kv.isk[ik];
                 }
 
-                //note: some MPI process will not have grids when MPI cores is too many, v_eff in these processes are empty
+                // note: some MPI process will not have grids when MPI cores are too many, 
+                // v_eff in these processes are empty
                 const double* vr_eff1 = v_eff.nc * v_eff.nr > 0? &(v_eff(GlobalV::CURRENT_SPIN, 0)):nullptr;
                     
                 if(!GlobalV::GAMMA_ONLY_LOCAL)
@@ -111,66 +154,84 @@ void ModuleIO::output_dH_R(const int& istep,
                     }
                 }
 
+                const int cspin = GlobalV::CURRENT_SPIN;
+
 				sparse_format::cal_dH(
                         lm,
+                        grid,
 						gen_h,
-						GlobalV::CURRENT_SPIN, 
-						sparse_threshold, 
+						cspin, 
+						sparse_thr, 
 						gint_k);
 			}
         }
     }
 
     // mohan update 2024-04-01
-    ModuleIO::save_dH_sparse(istep, lm, sparse_threshold, binary);
-    uhm.destroy_dH_R_sparse();
+    ModuleIO::save_dH_sparse(istep, lm, sparse_thr, binary);
+
+    lm.destroy_dH_R_sparse();
 
     gint_k.destroy_pvdpR();
 
-    ModuleBase::timer::tick("ModuleIO","output_HS_R"); 
+    ModuleBase::timer::tick("ModuleIO","output_dHR"); 
     return;
 }
 
-void ModuleIO::output_S_R(
-    LCAO_Hamilt &uhm,
+void ModuleIO::output_SR(
+    Parallel_Orbitals &pv, 
+    LCAO_Matrix &lm,
+    Grid_Driver &grid,
     hamilt::Hamilt<std::complex<double>>* p_ham,
     const std::string &SR_filename,
     const bool &binary,
-    const double &sparse_threshold)
+    const double &sparse_thr)
 {
-    ModuleBase::TITLE("ModuleIO","output_S_R");
-    ModuleBase::timer::tick("ModuleIO","output_S_R"); 
+    ModuleBase::TITLE("ModuleIO","output_SR");
+    ModuleBase::timer::tick("ModuleIO","output_SR"); 
+
+	sparse_format::cal_SR(
+            pv,
+			lm.all_R_coor,
+			lm.SR_sparse,
+			lm.SR_soc_sparse,
+			grid,
+			sparse_thr, 
+			p_ham);
 
-    uhm.cal_SR_sparse(sparse_threshold, p_ham);
+    const int istep=0;
 
 	ModuleIO::save_sparse(
-			uhm.LM->SR_sparse, 
-			uhm.LM->all_R_coor,
-			sparse_threshold, 
+			lm.SR_sparse, 
+			lm.all_R_coor,
+			sparse_thr, 
 			binary, 
 			SR_filename,
-			*uhm.LM->ParaV, 
+			*lm.ParaV, 
 			"S", 
-			0
+			istep
 			);
 
-    uhm.destroy_all_HSR_sparse();
+    lm.destroy_HS_R_sparse();
 
-    ModuleBase::timer::tick("ModuleIO","output_S_R");
+    ModuleBase::timer::tick("ModuleIO","output_SR");
     return;
 }
 
-void ModuleIO::output_T_R(
+void ModuleIO::output_TR(
     const int istep,
-    LCAO_Hamilt &uhm,
+    const UnitCell &ucell,
+    const Parallel_Orbitals &pv,
+    LCAO_Matrix &lm,
+    Grid_Driver &grid,
     LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02
     const std::string &TR_filename,
     const bool &binary,
-    const double &sparse_threshold
+    const double &sparse_thr
 )
 {
-    ModuleBase::TITLE("ModuleIO","output_T_R");
-    ModuleBase::timer::tick("ModuleIO","output_T_R"); 
+    ModuleBase::TITLE("ModuleIO","output_TR");
+    ModuleBase::timer::tick("ModuleIO","output_TR"); 
 
     std::stringstream sst;
     if(GlobalV::CALCULATION == "md" && !GlobalV::out_app_flag)
@@ -182,21 +243,27 @@ void ModuleIO::output_T_R(
         sst << GlobalV::global_out_dir << TR_filename;
     }
 
-    uhm.cal_TR_sparse(gen_h, sparse_threshold);
+	sparse_format::cal_TR(
+			ucell,
+			pv,
+			lm,
+			grid,
+			gen_h, 
+			sparse_thr);
 
 	ModuleIO::save_sparse(
-			uhm.LM->TR_sparse, 
-			uhm.LM->all_R_coor,
-			sparse_threshold, 
+			lm.TR_sparse, 
+			lm.all_R_coor,
+			sparse_thr, 
 			binary, 
 		    sst.str().c_str(),
-			*uhm.LM->ParaV, 
+			*(lm.ParaV), 
 			"T", 
 			istep
 			);
 
-    uhm.destroy_TR_sparse();
+    lm.destroy_T_R_sparse();
 
-    ModuleBase::timer::tick("ModuleIO","output_T_R");
+    ModuleBase::timer::tick("ModuleIO","output_TR");
     return;
 }
diff --git a/source/module_io/write_HS_R.h b/source/module_io/write_HS_R.h
index b61b3daf7f..69c2a26b88 100644
--- a/source/module_io/write_HS_R.h
+++ b/source/module_io/write_HS_R.h
@@ -4,14 +4,18 @@
 #include "module_base/matrix.h"
 #include "module_cell/klist.h"
 #include "module_hamilt_general/hamilt.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_gen_fixedH.h"
+#include "module_hamilt_lcao/module_gint/gint_k.h"
 
 namespace ModuleIO
 {
-        void output_HS_R(
+        void output_HSR(
             const int &istep,
             const ModuleBase::matrix& v_eff,
-            LCAO_Hamilt &UHM,
+			const Parallel_Orbitals &pv,
+			LCAO_Matrix &lm,
+            Grid_Driver &grid, // mohan add 2024-04-06
             const K_Vectors& kv,
             hamilt::Hamilt<std::complex<double>>* p_ham,
             const std::string& SR_filename = "data-SR-sparse_SPIN0.csr",
@@ -20,27 +24,32 @@ namespace ModuleIO
             const bool& binary = false,
             const double& sparse_threshold = 1e-10); //LiuXh add 2019-07-15, modify in 2021-12-3
 
-        void output_dH_R(
+        void output_dHR(
             const int &istep,
 			const ModuleBase::matrix& v_eff,
-			LCAO_Hamilt &uhm,
 			LCAO_gen_fixedH& gen_h, // mohan add 2024-04-02
 			Gint_k& gint_k,  // mohan add 2024-04-01
 			LCAO_Matrix &lm,  // mohan add 2024-04-01
+            Grid_Driver &grid, // mohan add 2024-04-06
             const K_Vectors& kv,
             const bool& binary = false,
             const double& sparse_threshold = 1e-10);
 
-        void output_T_R(
-            const int istep,
-            LCAO_Hamilt &UHM,
+        void output_TR(
+			const int istep,
+			const UnitCell &ucell,
+            const Parallel_Orbitals &pv,
+			LCAO_Matrix &lm,
+            Grid_Driver &grid,
             LCAO_gen_fixedH &gen_h, // mohan add 2024-04-02
             const std::string& TR_filename = "data-TR-sparse_SPIN0.csr",
             const bool& binary = false,
             const double& sparse_threshold = 1e-10);
 
-        void output_S_R(
-            LCAO_Hamilt &UHM,
+        void output_SR(
+            Parallel_Orbitals &pv, 
+            LCAO_Matrix &lm,
+            Grid_Driver &grid,
             hamilt::Hamilt<std::complex<double>>* p_ham,
             const std::string& SR_filename = "data-SR-sparse_SPIN0.csr",
             const bool& binary = false,
diff --git a/source/module_io/write_HS_sparse.cpp b/source/module_io/write_HS_sparse.cpp
index bfd534585a..aafe937d66 100644
--- a/source/module_io/write_HS_sparse.cpp
+++ b/source/module_io/write_HS_sparse.cpp
@@ -8,7 +8,7 @@
 void ModuleIO::save_HSR_sparse(
     const int &istep,
     LCAO_Matrix &lm,
-    const double& sparse_threshold,
+    const double& sparse_thr,
     const bool &binary,  
     const std::string &SR_filename, 
     const std::string &HR_filename_up, 
@@ -279,11 +279,11 @@ void ModuleIO::save_HSR_sparse(
             {
                 if (GlobalV::NSPIN != 4)
                 {
-                    output_single_R(g1[ispin], HR_sparse_ptr[ispin][R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1[ispin], HR_sparse_ptr[ispin][R_coor], sparse_thr, binary, *lm.ParaV);
                 }
                 else
                 {
-                    output_single_R(g1[ispin], HR_soc_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1[ispin], HR_soc_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
                 }
             }
         }
@@ -308,11 +308,11 @@ void ModuleIO::save_HSR_sparse(
         {
             if (GlobalV::NSPIN != 4)
             {
-                output_single_R(g2, SR_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                output_single_R(g2, SR_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
             }
             else
             {
-                output_single_R(g2, SR_soc_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                output_single_R(g2, SR_soc_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
             }
         }
 
@@ -322,7 +322,10 @@ void ModuleIO::save_HSR_sparse(
 
     if(GlobalV::DRANK==0) 
     {
-        for (int ispin = 0; ispin < spin_loop; ++ispin) g1[ispin].close();
+		for (int ispin = 0; ispin < spin_loop; ++ispin) 
+		{
+			g1[ispin].close();
+		}
         g2.close();
     }
     
@@ -342,7 +345,7 @@ void ModuleIO::save_HSR_sparse(
 void ModuleIO::save_dH_sparse(
     const int &istep,
     LCAO_Matrix &lm,
-    const double& sparse_threshold,
+    const double& sparse_thr,
     const bool &binary
 )
 {
@@ -619,33 +622,33 @@ void ModuleIO::save_dH_sparse(
             {
                 if (GlobalV::NSPIN != 4)
                 {
-                    output_single_R(g1x[ispin], dHRx_sparse_ptr[ispin][R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1x[ispin], dHRx_sparse_ptr[ispin][R_coor], sparse_thr, binary, *lm.ParaV);
                 }
                 else
                 {
-                    output_single_R(g1x[ispin], dHRx_soc_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1x[ispin], dHRx_soc_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
                 }
             }
             if (dHy_nonzero_num[ispin][count] > 0)
             {
                 if (GlobalV::NSPIN != 4)
                 {
-                    output_single_R(g1y[ispin], dHRy_sparse_ptr[ispin][R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1y[ispin], dHRy_sparse_ptr[ispin][R_coor], sparse_thr, binary, *lm.ParaV);
                 }
                 else
                 {
-                    output_single_R(g1y[ispin], dHRy_soc_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1y[ispin], dHRy_soc_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
                 }
             }
             if (dHz_nonzero_num[ispin][count] > 0)
             {
                 if (GlobalV::NSPIN != 4)
                 {
-                    output_single_R(g1z[ispin], dHRz_sparse_ptr[ispin][R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1z[ispin], dHRz_sparse_ptr[ispin][R_coor], sparse_thr, binary, *lm.ParaV);
                 }
                 else
                 {
-                    output_single_R(g1z[ispin], dHRz_soc_sparse_ptr[R_coor], sparse_threshold, binary, *lm.ParaV);
+                    output_single_R(g1z[ispin], dHRz_soc_sparse_ptr[R_coor], sparse_thr, binary, *lm.ParaV);
                 }
             }                        
         }
@@ -684,11 +687,12 @@ void ModuleIO::save_dH_sparse(
     return;
 }
 
+
 template<typename Tdata>
 void ModuleIO::save_sparse(
     const std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, Tdata>>>& smat,
     const std::set<Abfs::Vector3_Order<int>>& all_R_coor,
-    const double& sparse_threshold,
+    const double& sparse_thr,
     const bool& binary,
     const std::string& filename,
     const Parallel_Orbitals& pv,
@@ -791,7 +795,7 @@ void ModuleIO::save_sparse(
             }
         }
 
-        output_single_R(ofs, smat.at(R_coor), sparse_threshold, binary, pv, reduce);
+        output_single_R(ofs, smat.at(R_coor), sparse_thr, binary, pv, reduce);
         ++count;
     }
 	if (!reduce || GlobalV::DRANK == 0) 
diff --git a/source/module_io/write_HS_sparse.h b/source/module_io/write_HS_sparse.h
index 9411eeee75..afef84adf3 100644
--- a/source/module_io/write_HS_sparse.h
+++ b/source/module_io/write_HS_sparse.h
@@ -7,23 +7,24 @@
 
 #include <string>
 
-// mohan add this file 2010-09-10
 namespace ModuleIO
 {
+
     // jingan add 2021-6-4, modify 2021-12-2
     void save_HSR_sparse(
         const int &istep,
         LCAO_Matrix &lm,
-        const double& sparse_threshold,
+        const double& sparse_thr,
         const bool &binary,  
         const std::string &SR_filename, 
         const std::string &HR_filename_up, 
         const std::string &HR_filename_down
     );
+
     void save_dH_sparse(
         const int &istep,
         LCAO_Matrix &lm,
-        const double& sparse_threshold,
+        const double& sparse_thr,
         const bool& binary
     );
 
@@ -31,7 +32,7 @@ namespace ModuleIO
     void save_sparse(
         const std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, Tdata>>>& smat,
         const std::set<Abfs::Vector3_Order<int>>& all_R_coor,
-        const double& sparse_threshold,
+        const double& sparse_thr,
         const bool& binary,
         const std::string& filename,
         const Parallel_Orbitals& pv,
diff --git a/source/module_io/write_dos_lcao.cpp b/source/module_io/write_dos_lcao.cpp
index 8decbc9675..023cfa7636 100644
--- a/source/module_io/write_dos_lcao.cpp
+++ b/source/module_io/write_dos_lcao.cpp
@@ -31,7 +31,8 @@
 template <>
 void ModuleIO::write_dos_lcao(
     const psi::Psi<double>* psi,
-    LCAO_Hamilt& uhm,
+    LCAO_Matrix &lm,
+    const Parallel_Orbitals &pv, 
     const ModuleBase::matrix& ekb,
     const ModuleBase::matrix& wg,
     const double& dos_edelta_ev,
@@ -42,11 +43,11 @@ void ModuleIO::write_dos_lcao(
 {
     ModuleBase::TITLE("ModuleIO", "write_dos_lcao");
 
-    const Parallel_Orbitals* pv = uhm.LM->ParaV;
-
     int nspin0 = 1;
-    if (GlobalV::NSPIN == 2)
-        nspin0 = 2;
+	if (GlobalV::NSPIN == 2)
+	{
+		nspin0 = 2;
+	}
 
     // find the maximal and minimal band energy.
     double emax = ekb(0, 0);
@@ -67,10 +68,16 @@ void ModuleIO::write_dos_lcao(
 
     emax *= ModuleBase::Ry_to_eV;
     emin *= ModuleBase::Ry_to_eV;
-    if (INPUT.dos_setemax)
-        emax = INPUT.dos_emax_ev;
-    if (INPUT.dos_setemin)
-        emin = INPUT.dos_emin_ev;
+	if (INPUT.dos_setemax)
+	{
+		emax = INPUT.dos_emax_ev;
+	}
+
+	if (INPUT.dos_setemin)
+	{
+		emin = INPUT.dos_emin_ev;
+	}
+
     if (!INPUT.dos_setemax && !INPUT.dos_setemin)
     {
         // scale up a little bit so the end peaks are displaced better
@@ -117,7 +124,7 @@ void ModuleIO::write_dos_lcao(
 
         std::vector<ModuleBase::matrix> Mulk;
         Mulk.resize(1);
-        Mulk[0].create(pv->ncol, pv->nrow);
+        Mulk[0].create(pv.ncol, pv.nrow);
 
         psi->fix_k(is);
         const double* ppsi = psi->get_pointer();
@@ -146,31 +153,31 @@ void ModuleIO::write_dos_lcao(
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
                 &one_float,
-                uhm.LM->Sloc.data(),
+                lm.Sloc.data(),
                 &one_int,
                 &one_int,
-                pv->desc,
+                pv.desc,
                 ppsi,
                 &one_int,
                 &NB,
-                pv->desc,
+                pv.desc,
                 &one_int,
                 &zero_float,
                 Mulk[0].c,
                 &one_int,
                 &NB,
-                pv->desc,
+                pv.desc,
                 &one_int);
 #endif
 
             for (int j = 0; j < GlobalV::NLOCAL; ++j)
             {
 
-                if (pv->in_this_processor(j, i))
+                if (pv.in_this_processor(j, i))
                 {
 
-                    const int ir = pv->global2local_row(j);
-                    const int ic = pv->global2local_col(i);
+                    const int ir = pv.global2local_row(j);
+                    const int ic = pv.global2local_col(i);
                     waveg[j] = Mulk[0](ic, ir) * psi[0](ic, ir);
                     const double x = waveg[j].real();
                     BlasConnector::axpy(np, x, Gauss, 1, pdosk[is].c + j * pdosk[is].nc, 1);
@@ -333,10 +340,13 @@ void ModuleIO::write_dos_lcao(
 
     return;
 }
+
+
 template<>
 void ModuleIO::write_dos_lcao(
     const psi::Psi<std::complex<double>>* psi,
-    LCAO_Hamilt& uhm,
+    LCAO_Matrix &lm,
+    const Parallel_Orbitals &pv,
     const ModuleBase::matrix& ekb,
     const ModuleBase::matrix& wg,
     const double& dos_edelta_ev,
@@ -347,11 +357,11 @@ void ModuleIO::write_dos_lcao(
 {
     ModuleBase::TITLE("ModuleIO", "write_dos_lcao");
 
-    const Parallel_Orbitals* pv = uhm.LM->ParaV;
-
     int nspin0 = 1;
-    if (GlobalV::NSPIN == 2)
-        nspin0 = 2;
+	if (GlobalV::NSPIN == 2)
+	{
+		nspin0 = 2;
+	}
 
     // find the maximal and minimal band energy.
     double emax = ekb(0, 0);
@@ -372,10 +382,16 @@ void ModuleIO::write_dos_lcao(
 
     emax *= ModuleBase::Ry_to_eV;
     emin *= ModuleBase::Ry_to_eV;
-    if (INPUT.dos_setemax)
-        emax = INPUT.dos_emax_ev;
-    if (INPUT.dos_setemin)
-        emin = INPUT.dos_emin_ev;
+	if (INPUT.dos_setemax)
+	{
+		emax = INPUT.dos_emax_ev;
+	}
+
+	if (INPUT.dos_setemin)
+	{
+		emin = INPUT.dos_emin_ev;
+	}
+
     if (!INPUT.dos_setemax && !INPUT.dos_setemin)
     {
         // scale up a little bit so the end peaks are displaced better
@@ -389,6 +405,7 @@ void ModuleIO::write_dos_lcao(
     //  output the PDOS file.////qifeng-2019-01-21
     // 		atom_arrange::set_sr_NL();
     //		atom_arrange::search( GlobalV::SEARCH_RADIUS );//qifeng-2019-01-21
+
     const double de_ev = dos_edelta_ev;
 
     const int npoints = static_cast<int>(std::floor((emax - emin) / de_ev));
@@ -400,10 +417,11 @@ void ModuleIO::write_dos_lcao(
 
     for (int is = 0; is < nspin0; ++is)
     {
-
         pdosk[is].create(GlobalV::NLOCAL, np, true);
     }
+
     ModuleBase::matrix* pdos = new ModuleBase::matrix[nspin0];
+
     for (int is = 0; is < nspin0; ++is)
     {
         pdos[is].create(GlobalV::NLOCAL, np, true);
@@ -415,14 +433,13 @@ void ModuleIO::write_dos_lcao(
 
     std::complex<double>* waveg = new std::complex<double>[GlobalV::NLOCAL];
 
-    double* Gauss = new double[np];
+    double* Gauss = new double[np]();
 
     for (int is = 0; is < nspin0; ++is)
     {
-
             std::vector<ModuleBase::ComplexMatrix> Mulk;
             Mulk.resize(1);
-            Mulk[0].create(pv->ncol, pv->nrow);
+            Mulk[0].create(pv.ncol, pv.nrow);
 
             for (int ik = 0; ik < kv.nks; ik++)
             {
@@ -433,11 +450,13 @@ void ModuleIO::write_dos_lcao(
                     // the target matrix is LM->Sloc2 with collumn-major
                     if (GlobalV::NSPIN == 4)
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, uhm.LM, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(
+                         ik, &lm, 1);
                     }
                     else
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, uhm.LM, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(
+                         ik, &lm, 1);
                     }
 
                     psi->fix_k(ik);
@@ -475,31 +494,31 @@ void ModuleIO::write_dos_lcao(
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
-                            uhm.LM->Sloc2.data(),
+                            lm.Sloc2.data(),
                             &one_int,
                             &one_int,
-                            pv->desc,
+                            pv.desc,
                             p_dwfc,
                             &one_int,
                             &NB,
-                            pv->desc,
+                            pv.desc,
                             &one_int,
                             &zero_float[0],
                             Mulk[0].c,
                             &one_int,
                             &NB,
-                            pv->desc,
+                            pv.desc,
                             &one_int);
 #endif
 
                         for (int j = 0; j < GlobalV::NLOCAL; ++j)
                         {
 
-                            if (pv->in_this_processor(j, i))
+                            if (pv.in_this_processor(j, i))
                             {
 
-                                const int ir = pv->global2local_row(j);
-                                const int ic = pv->global2local_col(i);
+                                const int ir = pv.global2local_row(j);
+                                const int ic = pv.global2local_col(i);
 
                                 waveg[j] = Mulk[0](ic, ir) * psi[0](ic, ir);
                                 const double x = waveg[j].real();
@@ -519,6 +538,7 @@ void ModuleIO::write_dos_lcao(
     delete[] pdosk;
     delete[] waveg;
     delete[] Gauss;
+
     if (GlobalV::MY_RANK == 0)
     {
         {
@@ -568,10 +588,14 @@ void ModuleIO::write_dos_lcao(
 
             out << "<pdos>" << std::endl;
             out << "<nspin>" << GlobalV::NSPIN << "</nspin>" << std::endl;
-            if (GlobalV::NSPIN == 4)
-                out << "<norbitals>" << std::setw(2) << GlobalV::NLOCAL / 2 << "</norbitals>" << std::endl;
-            else
-                out << "<norbitals>" << std::setw(2) << GlobalV::NLOCAL << "</norbitals>" << std::endl;
+			if (GlobalV::NSPIN == 4)
+			{
+				out << "<norbitals>" << std::setw(2) << GlobalV::NLOCAL / 2 << "</norbitals>" << std::endl;
+			}
+			else
+			{
+				out << "<norbitals>" << std::setw(2) << GlobalV::NLOCAL << "</norbitals>" << std::endl;
+			}
             out << "<energy_values units=\"eV\">" << std::endl;
 
             for (int n = 0; n < npoints; ++n)
@@ -608,7 +632,6 @@ void ModuleIO::write_dos_lcao(
                     {
                         for (int n = 0; n < npoints; ++n)
                         {
-
                             out << std::setw(13) << pdos[0](w, n) << std::endl;
                         } // n
                     }
diff --git a/source/module_io/write_dos_lcao.h b/source/module_io/write_dos_lcao.h
index 31aeb781ac..3abdb16178 100644
--- a/source/module_io/write_dos_lcao.h
+++ b/source/module_io/write_dos_lcao.h
@@ -1,10 +1,12 @@
 #ifndef WRITE_DOS_LCAO_H
 #define WRITE_DOS_LCAO_H
+
 #include "module_base/matrix.h"
 #include "module_cell/klist.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
 #include "module_psi/psi.h"
 #include "module_hamilt_general/hamilt.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 
 namespace ModuleIO
 {
@@ -12,7 +14,8 @@ namespace ModuleIO
     template <typename T>
     void write_dos_lcao(
         const psi::Psi<T>* psi,
-        LCAO_Hamilt& uhm,
+        LCAO_Matrix &lm,
+        const Parallel_Orbitals &pv, 
         const ModuleBase::matrix& ekb,
         const ModuleBase::matrix& wg,
         const double& dos_edelta_ev,
diff --git a/source/module_io/write_proj_band_lcao.cpp b/source/module_io/write_proj_band_lcao.cpp
index 71b37de8e0..2b31f50535 100644
--- a/source/module_io/write_proj_band_lcao.cpp
+++ b/source/module_io/write_proj_band_lcao.cpp
@@ -11,7 +11,7 @@
 template<>
 void ModuleIO::write_proj_band_lcao(
     const psi::Psi<double>* psi,
-    LCAO_Hamilt& uhm,
+    LCAO_Matrix& lm,
     const elecstate::ElecState* pelec,
     const K_Vectors& kv,
     const UnitCell &ucell,
@@ -20,7 +20,7 @@ void ModuleIO::write_proj_band_lcao(
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
 
-    const Parallel_Orbitals* pv = uhm.LM->ParaV;
+    const Parallel_Orbitals* pv = lm.ParaV;
 
     int nspin0 = 1;
     if (GlobalV::NSPIN == 2)
@@ -61,7 +61,7 @@ void ModuleIO::write_proj_band_lcao(
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
                 &one_float,
-                uhm.LM->Sloc.data(),
+                lm.Sloc.data(),
                 &one_int,
                 &one_int,
                 pv->desc,
@@ -166,7 +166,7 @@ void ModuleIO::write_proj_band_lcao(
 template<>
 void ModuleIO::write_proj_band_lcao(
     const psi::Psi<std::complex<double>>* psi,
-    LCAO_Hamilt& uhm,
+    LCAO_Matrix& lm,
     const elecstate::ElecState* pelec,
     const K_Vectors& kv,
     const UnitCell& ucell,
@@ -175,7 +175,7 @@ void ModuleIO::write_proj_band_lcao(
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
 
-    const Parallel_Orbitals* pv = uhm.LM->ParaV;
+    const Parallel_Orbitals* pv = lm.ParaV;
 
     int nspin0 = 1;
     if (GlobalV::NSPIN == 2)
@@ -210,12 +210,15 @@ void ModuleIO::write_proj_band_lcao(
                     // the target matrix is LM->Sloc2 with collumn-major
                     if (GlobalV::NSPIN == 4)
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, uhm.LM, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(
+                          ik, &lm, 1);
                     }
                     else
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, uhm.LM, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(
+                          ik, &lm, 1);
                     }
+
                     // calculate Mulk
                     psi->fix_k(ik);
                     psi::Psi<std::complex<double>> Dwfc(psi[0], 1);
@@ -229,16 +232,16 @@ void ModuleIO::write_proj_band_lcao(
                     {
                         const int NB = i + 1;
 
-                        const double one_float[2] = { 1.0, 0.0 }, zero_float[2] = { 0.0, 0.0 };
+                        const double one_float[2] = { 1.0, 0.0 };
+                        const double zero_float[2] = { 0.0, 0.0 };
                         const int one_int = 1;
-                        //   const int two_int=2;
-                        const char T_char = 'T'; // N_char='N',U_char='U'
+                        const char T_char = 'T';
 #ifdef __MPI
                         pzgemv_(&T_char,
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
-                            uhm.LM->Sloc2.data(),
+                            lm.Sloc2.data(),
                             &one_int,
                             &one_int,
                             pv->desc,
@@ -292,14 +295,16 @@ void ModuleIO::write_proj_band_lcao(
             {
                 out << "<norbitals>" << std::setw(2) << GlobalV::NLOCAL << "</norbitals>" << std::endl;
             }
+
             out << "<band_structure nkpoints=\"" << nks << "\" nbands=\"" << GlobalV::NBANDS << "\" units=\"eV\">"
                 << std::endl;
-                for (int ik = 0; ik < nks; ik++)
-                {
-                    for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-                        out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
-                    out << std::endl;
-                }
+
+			for (int ik = 0; ik < nks; ik++)
+			{
+				for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+					out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
+				out << std::endl;
+			}
             out << "</band_structure>" << std::endl;
 
             for (int i = 0; i < ucell.nat; i++)
@@ -325,24 +330,28 @@ void ModuleIO::write_proj_band_lcao(
                     out << std::setw(2) << "z=\"" << std::setw(40) << N1 + 1 << "\"" << std::endl;
                     out << ">" << std::endl;
                     out << "<data>" << std::endl;
-                        for (int ik = 0; ik < nks; ik++)
-                        {
-                            for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-                            {
-                                if (GlobalV::NSPIN == 1)
-                                    out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
-                                else if (GlobalV::NSPIN == 2)
-                                    out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
-                                else if (GlobalV::NSPIN == 4)
-                                {
-                                    int w0 = w - s0;
-                                    out << std::setw(13)
-                                        << weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
-                                        + weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
-                                }
-                            }
-                            out << std::endl;
-                        }
+					for (int ik = 0; ik < nks; ik++)
+					{
+						for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+						{
+							if (GlobalV::NSPIN == 1)
+							{
+								out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
+							}
+							else if (GlobalV::NSPIN == 2)
+							{
+								out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
+							}
+							else if (GlobalV::NSPIN == 4)
+							{
+								int w0 = w - s0;
+								out << std::setw(13)
+									<< weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
+									+ weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
+							}
+						}
+						out << std::endl;
+					}
                     out << "</data>" << std::endl;
                     out << "</orbital>" << std::endl;
                 } // j
diff --git a/source/module_io/write_proj_band_lcao.h b/source/module_io/write_proj_band_lcao.h
index 5fdb486da2..1da65b6169 100644
--- a/source/module_io/write_proj_band_lcao.h
+++ b/source/module_io/write_proj_band_lcao.h
@@ -5,7 +5,7 @@
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_cell/unitcell.h"
 #include "module_elecstate/elecstate.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_hamilt.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_psi/psi.h"
 #include "module_hamilt_general/hamilt.h"
 
@@ -14,7 +14,7 @@ namespace ModuleIO
     template <typename TK>
     void write_proj_band_lcao(
         const psi::Psi<TK>* psi,
-		LCAO_Hamilt& uhm,
+		LCAO_Matrix& lm,
 		const elecstate::ElecState* pelec,
 		const K_Vectors& kv,
 		const UnitCell &ucell,