forked from abacusmodeling/abacus-develop
-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Perf: split code_gen.cpp to reduce compilation time (#4210)
* split code_gen.cpp to reduce compilation time * alter the file location of code_gen_*.cu * fix an error in CMakeList
- Loading branch information
Showing
20 changed files
with
1,647 additions
and
719 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
473 changes: 473 additions & 0 deletions
473
source/module_hamilt_lcao/module_gint/kernels/cuda/code_gen.cuh
Large diffs are not rendered by default.
Oops, something went wrong.
48 changes: 48 additions & 0 deletions
48
source/module_hamilt_lcao/module_gint/kernels/cuda/code_gen_00.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#include "vbatch_matrix_mul.cuh" | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 32, 2, 2, 16, 2, 16>(int, int, int*, int*, int*, double**, int*, double**, int*, double**, int*, int, cudaStream_t, float&, matrix_multiple_func_type&, double*, double*, double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 32, 4, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 32, 6, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 32, 8, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 48, 2, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 48, 4, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 2, 16, 16, 48, 6, 2, 16, 2, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 24, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 24, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 24, 12, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 32, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 32, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 40, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 40, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 48, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 56, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 8, 64, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 16, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 16, 12, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 24, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 24, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 32, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
48 changes: 48 additions & 0 deletions
48
source/module_hamilt_lcao/module_gint/kernels/cuda/code_gen_01.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#include "vbatch_matrix_mul.cuh" | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 32, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 40, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 48, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 16, 56, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 16, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 24, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 24, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 32, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 24, 40, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 32, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 32, 16, 8, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 32, 24, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 40, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 40, 24, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 48, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 8, 56, 16, 4, 4, 8, 4, 8>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 32, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 32, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 32, 12, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 32, 16, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 48, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 48, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
48 changes: 48 additions & 0 deletions
48
source/module_hamilt_lcao/module_gint/kernels/cuda/code_gen_02.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#include "vbatch_matrix_mul.cuh" | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 48, 12, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 64, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 16, 64, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 32, 32, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 32, 32, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 32, 32, 12, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 32, 48, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 32, 48, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 48, 32, 4, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 16, 48, 32, 8, 4, 16, 4, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 24, 24, 48, 4, 4, 24, 4, 24>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 24, 24, 48, 8, 4, 24, 4, 24>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 24, 24, 48, 12, 4, 24, 4, 24>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 24, 48, 48, 4, 4, 24, 4, 24>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 24, 48, 48, 8, 4, 24, 4, 24>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 32, 32, 64, 4, 4, 32, 4, 32>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 32, 32, 64, 8, 4, 32, 4, 32>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 32, 32, 64, 12, 4, 32, 4, 32>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 4, 32, 32, 64, 16, 4, 32, 4, 32>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 6, 16, 48, 32, 6, 6, 16, 6, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 6, 16, 48, 32, 12, 6, 16, 6, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 6, 16, 48, 48, 6, 6, 16, 6, 16>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
||
template void gemm_time_measure<double, 8, 4, 16, 12, 8, 8, 4, 8, 4>(int,int,int*,int*,int*,double**,int*,double**,int*,double**,int*,int,cudaStream_t,float&,matrix_multiple_func_type&,double*,double*,double*); | ||
|
Oops, something went wrong.