diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 383178f656..16028d3846 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:08:17 +DATE: 2023-11-09_18:26:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6257s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6302s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1680s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4156s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3301s - [COUNTERS] Fortran MEs ( 1 ) : 0.0855s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4280s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3388s + [COUNTERS] Fortran MEs ( 1 ) : 0.0892s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1878s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1830s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 90112 events => throughput is 1.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0710s for 90112 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.227734e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246747e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.242066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254814e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1813s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.06E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 90112 events => throughput is 2.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0440s for 90112 events => throughput is 2.05E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002470e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008841e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.006601e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041604e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1828s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 90112 events => throughput is 2.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.71E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.620678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.648221e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.819190e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737599e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3723s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3409s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 90112 events => throughput is 2.84E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.820321e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.822405e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.842053e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.840653e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.34E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 90112 events => throughput is 2.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 90112 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.075096e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213684e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.166357e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.288308e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5929s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5936s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7863s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7594s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.141020e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.122558e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.873271e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.902108e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.990853e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.029032e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.361218e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.427964e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.939860e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.990174e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.944408e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.966232e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.975323e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.011562e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124184e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.099952e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 4b3b0b9b07..bed8731e5c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:08:35 +DATE: 2023-11-09_18:26:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6195s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6295s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1778s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1697s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1700s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3290s - [COUNTERS] Fortran MEs ( 1 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s + [COUNTERS] Fortran MEs ( 1 ) : 0.0858s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1874s + [COUNTERS] PROGRAM TOTAL : 0.1876s [COUNTERS] Fortran Overhead ( 0 ) : 0.1813s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.33E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4142s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0687s for 90112 events => throughput is 1.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0693s for 90112 events => throughput is 1.30E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.261327e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.290954e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269110e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1799s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.11E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3651s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 90112 events => throughput is 3.33E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.137840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211958e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.298087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331194e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1864s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1841s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3609s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 90112 events => throughput is 3.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 90112 events => throughput is 3.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.442542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583243e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.634986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.664821e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1856s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.64E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3440s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4013s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 90112 events => throughput is 3.35E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.708142e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.872180e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716354e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1887s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 90112 events => throughput is 3.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3532s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 90112 events => throughput is 3.47E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.372399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.388042e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.586770e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.799218e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5951s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5946s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.72E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.96E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.577355e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.613080e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.822297e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898284e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.937359e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543811e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.046785e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026187e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.102347e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.468953e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.203659e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.241582e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.365649e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.812787e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.422918e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411277e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 9a947a36a5..8b8c11aaf5 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -17,13 +17,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:08:51 +DATE: 2023-11-09_18:26:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6186s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6293s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s - [COUNTERS] Fortran MEs ( 1 ) : 0.0853s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s + [COUNTERS] Fortran MEs ( 1 ) : 0.0865s for 90112 events => throughput is 1.04E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,8 +134,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1898s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s + [COUNTERS] PROGRAM TOTAL : 0.1883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0720s for 90112 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0725s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204267e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192297e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.208788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.206668e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1790s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 90112 events => throughput is 2.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3847s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3426s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 90112 events => throughput is 2.14E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.047978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.077610e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.116427e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.127798e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1871s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1841s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3733s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 90112 events => throughput is 2.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0336s for 90112 events => throughput is 2.68E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.642107e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.567900e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.787956e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.786544e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.89E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1822s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,8 +395,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3708s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3395s + [COUNTERS] PROGRAM TOTAL : 0.3728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3415s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0313s for 90112 events => throughput is 2.88E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.821887e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.787216e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.874115e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802177e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3842s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 90112 events => throughput is 2.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.237740e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306669e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.366800e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.302969e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5935s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5930s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5952s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7704s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.82E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7615s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.88E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.007927e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094813e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918411e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912678e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.018629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.000800e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.348012e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.334730e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.994146e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.018486e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.917104e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.914438e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.983673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.024074e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.123333e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129214e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 3e628018af..824a8e25d5 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:09:08 +DATE: 2023-11-09_18:26:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3517s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3111s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3140s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2681s - [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2683s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6533s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2091s - [COUNTERS] Fortran MEs ( 1 ) : 0.4442s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2429s + [COUNTERS] Fortran MEs ( 1 ) : 0.4528s for 90112 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3456s + [COUNTERS] PROGRAM TOTAL : 0.3445s [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0379s for 8192 events => throughput is 2.16E+05 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6716s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2645s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4071s for 90112 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4128s for 90112 events => throughput is 2.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.224417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.206364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.212367e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.211188e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3133s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4780s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2359s for 90112 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4919s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2354s for 90112 events => throughput is 3.83E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.777989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.806213e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.740213e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.795645e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2955s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2981s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1455s for 90112 events => throughput is 6.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3832s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1448s for 90112 events => throughput is 6.22E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.030466e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.053490e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.192047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.106690e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2943s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 6.97E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3659s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1297s for 90112 events => throughput is 6.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1287s for 90112 events => throughput is 7.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.841360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.704382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.816529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.799597e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3142s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 8192 events => throughput is 4.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4624s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2517s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2106s for 90112 events => throughput is 4.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6624s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2333s for 90112 events => throughput is 3.86E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.955720e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.938387e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.094472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.929754e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6940s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6935s + [COUNTERS] PROGRAM TOTAL : 0.6969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6963s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7032s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6507s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.103744e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071187e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.691695e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.692368e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.194593e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.183000e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.070229e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074203e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.168601e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.195387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.149757e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150737e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.190999e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.203236e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.017633e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040065e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 0321a276a0..6ff403b879 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -4,9 +4,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:09:34 +DATE: 2023-11-09_18:27:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3489s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3083s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3494s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3073s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2670s - [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2663s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2060s - [COUNTERS] Fortran MEs ( 1 ) : 0.4442s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2070s + [COUNTERS] Fortran MEs ( 1 ) : 0.4466s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 8192 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3397s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 8192 events => throughput is 2.35E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6631s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2770s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3861s for 90112 events => throughput is 2.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6398s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3809s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.342613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342865e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331036e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.59E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3973s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2359s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1614s for 90112 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4772s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1682s for 90112 events => throughput is 5.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.270911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.225442e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.359921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.299428e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3001s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.88E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2356s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 90112 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2317s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0848s for 90112 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025673e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.028771e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017812e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,8 +362,8 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2821s + [COUNTERS] PROGRAM TOTAL : 0.2858s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2785s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3159s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2370s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3072s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0790s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.095999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097760e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.120004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119253e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2909s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 8192 events => throughput is 8.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2807s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4173s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1195s for 90112 events => throughput is 7.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1112s for 90112 events => throughput is 8.10E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.668644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884299e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.548978e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.701504e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6943s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6937s + [COUNTERS] PROGRAM TOTAL : 0.6960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6955s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6513s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.67E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6624s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.68E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.266713e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111635e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.234896e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.880409e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.830084e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.143607e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.762403e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.762374e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.776301e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.140173e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.872477e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.866583e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.374142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.685718e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.426544e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.400545e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 8bacc65fe8..9b02995ca5 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:09:59 +DATE: 2023-11-09_18:27:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3194s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2662s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6907s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s - [COUNTERS] Fortran MEs ( 1 ) : 0.4542s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6580s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2117s + [COUNTERS] Fortran MEs ( 1 ) : 0.4462s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 8192 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3460s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0379s for 8192 events => throughput is 2.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6798s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4115s for 90112 events => throughput is 2.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6700s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4119s for 90112 events => throughput is 2.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.164831e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182152e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.183670e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.183502e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2936s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3147s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4761s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2295s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4759s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2281s for 90112 events => throughput is 3.95E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.820026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.756525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.775419e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2965s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.38E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2736s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1464s for 90112 events => throughput is 6.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3920s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2488s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1432s for 90112 events => throughput is 6.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.181937e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.159361e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.243573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.220899e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3670s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2408s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1262s for 90112 events => throughput is 7.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3693s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 90112 events => throughput is 7.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.933959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.912537e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.064349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.069074e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.75E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4519s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2484s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2036s for 90112 events => throughput is 4.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4629s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2078s for 90112 events => throughput is 4.34E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.266660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.077933e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.997576e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6539s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6617s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.049281e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060435e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.529307e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608769e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.148817e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.186491e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.053163e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059369e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.170472e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.182441e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.130394e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136921e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.186789e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174632e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.035076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.949461e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 09e16e6057..241597d591 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:10:24 +DATE: 2023-11-09_18:28:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2280s - [COUNTERS] Fortran MEs ( 1 ) : 0.3156s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2379s + [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2186s - [COUNTERS] Fortran MEs ( 1 ) : 0.3141s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9133s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s - [COUNTERS] Fortran MEs ( 1 ) : 3.5067s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8579s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3886s + [COUNTERS] Fortran MEs ( 1 ) : 3.4692s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3241s for 8192 events => throughput is 2.53E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3255s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7008s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6247s for 90112 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2563s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6842s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5721s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.590377e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.570949e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.610150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.596498e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5624s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1763s for 8192 events => throughput is 4.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1684s for 8192 events => throughput is 4.87E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3972s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5470s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8502s for 90112 events => throughput is 4.87E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.5019s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5803s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9216s for 90112 events => throughput is 4.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.010592e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.985717e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.958333e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.959096e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 8192 events => throughput is 9.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3840s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0829s for 8192 events => throughput is 9.88E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3684s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9187s for 90112 events => throughput is 9.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3753s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4512s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9241s for 90112 events => throughput is 9.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.953639e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005162e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.002866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000723e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2906s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 8192 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8222s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8260s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.117525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111268e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.126876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117996e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4269s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3231s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1039s for 8192 events => throughput is 7.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4279s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1044s for 8192 events => throughput is 7.85E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6060s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4689s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1371s for 90112 events => throughput is 7.92E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6406s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4855s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1551s for 90112 events => throughput is 7.80E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.896705e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.832306e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.740238e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.896180e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6527s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6472s + [COUNTERS] PROGRAM TOTAL : 0.6558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8560s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8329s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 90112 events => throughput is 3.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.624902e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.613028e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.902263e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.229609e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.850642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.871226e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.238047e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236452e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.868590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.869896e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.248755e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.247810e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.862444e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.851703e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.745100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745705e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 1a98ebc0f5..9b1af7b411 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:11:05 +DATE: 2023-11-09_18:28:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5362s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2200s - [COUNTERS] Fortran MEs ( 1 ) : 0.3162s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2218s + [COUNTERS] Fortran MEs ( 1 ) : 0.3159s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3154s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5364s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3161s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8590s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3887s - [COUNTERS] Fortran MEs ( 1 ) : 3.4703s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3985s + [COUNTERS] Fortran MEs ( 1 ) : 3.5176s for 90112 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8380s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5255s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3125s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5250s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3162s for 8192 events => throughput is 2.59E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6696s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4470s for 90112 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1769s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6882s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4887s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.677117e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.661457e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.693750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666467e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4030s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0934s for 8192 events => throughput is 8.77E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4080s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0948s for 8192 events => throughput is 8.64E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4739s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4457s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0282s for 90112 events => throughput is 8.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4696s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0347s for 90112 events => throughput is 8.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.839523e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.800531e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.853955e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.815957e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,8 +286,8 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2591s + [COUNTERS] PROGRAM TOTAL : 0.3077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8724s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4009s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4715s for 90112 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.823286e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.826868e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2944s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0382s for 8192 events => throughput is 2.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8215s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3936s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4279s for 90112 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8383s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4303s for 90112 events => throughput is 2.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.114883e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.101947e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.107711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.126133e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3218s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0508s for 8192 events => throughput is 1.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9668s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4146s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5522s for 90112 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9888s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5637s for 90112 events => throughput is 1.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.619298e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589248e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.625264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587181e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.66E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8143s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.275339e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303788e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.852966e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857184e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.672301e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.329588e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.358085e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.661199e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.712514e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.474053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.447022e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.511679e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573590e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.616407e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621450e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b41396f75b..e102a98f20 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:11:42 +DATE: 2023-11-09_18:29:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2201s - [COUNTERS] Fortran MEs ( 1 ) : 0.3160s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2214s + [COUNTERS] Fortran MEs ( 1 ) : 0.3192s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5352s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3167s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s + [COUNTERS] Fortran MEs ( 1 ) : 0.3170s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8603s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3884s - [COUNTERS] Fortran MEs ( 1 ) : 3.4719s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8531s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3845s + [COUNTERS] Fortran MEs ( 1 ) : 3.4687s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3301s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3331s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2894s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6845s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6049s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7144s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6453s for 90112 events => throughput is 2.47E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.562016e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.553245e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.546299e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536593e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1640s for 8192 events => throughput is 4.99E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5484s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1657s for 8192 events => throughput is 4.94E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3591s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5386s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8206s for 90112 events => throughput is 4.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3712s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5426s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8286s for 90112 events => throughput is 4.93E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765208e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047917e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.784106e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047714e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3156s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0888s for 8192 events => throughput is 9.23E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3884s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0838s for 8192 events => throughput is 9.78E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3898s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4601s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9297s for 90112 events => throughput is 9.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3827s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4554s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9273s for 90112 events => throughput is 9.72E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.002689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.985245e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.974556e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2923s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8064s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2643s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4493s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8151s for 90112 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.067840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.146843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069793e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4312s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3234s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1077s for 8192 events => throughput is 7.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1191s for 8192 events => throughput is 6.88E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6602s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4811s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1791s for 90112 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7582s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5206s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2377s for 90112 events => throughput is 7.28E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.628154e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.675272e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.726777e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.626790e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8190s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7961s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8293s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.619555e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635720e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.404025e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.120274e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.847979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.835173e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233328e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231986e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.825056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.818919e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244373e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242590e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.833245e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.805414e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.724277e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.724480e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e6041006eb..408d8d380a 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -16,14 +16,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:12:23 +DATE: 2023-11-09_18:30:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3823s + [COUNTERS] PROGRAM TOTAL : 4.3928s [COUNTERS] Fortran Overhead ( 0 ) : 0.2780s - [COUNTERS] Fortran MEs ( 1 ) : 4.1043s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] Fortran MEs ( 1 ) : 4.1147s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2728s - [COUNTERS] Fortran MEs ( 1 ) : 4.0853s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s + [COUNTERS] Fortran MEs ( 1 ) : 4.1131s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.0624s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8747s - [COUNTERS] Fortran MEs ( 1 ) : 45.1877s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.4210s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8888s + [COUNTERS] Fortran MEs ( 1 ) : 45.5321s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6032s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3774s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2258s for 8192 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6565s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4044s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2521s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.5656s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9752s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.5903s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.9600s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0482s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.9118s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002618e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992604e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.000666e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989276e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6983s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4517s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2466s for 8192 events => throughput is 3.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7458s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4719s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2739s for 8192 events => throughput is 3.60E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.0395s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1529s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8866s for 90112 events => throughput is 3.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.7086s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1354s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5732s for 90112 events => throughput is 3.52E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.775162e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697279e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.752647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.704506e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9799s for 8192 events => throughput is 8.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2161s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2329s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9832s for 8192 events => throughput is 8.33E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.6019s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8333s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7686s for 90112 events => throughput is 8.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6675s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8433s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8242s for 90112 events => throughput is 8.33E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.622945e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.632389e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.637406e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.597678e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9647s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8583s for 8192 events => throughput is 9.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8591s for 8192 events => throughput is 9.54E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2465s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7171s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5294s for 90112 events => throughput is 9.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1508s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7103s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4405s for 90112 events => throughput is 9.55E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.867536e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.863291e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.834174e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.840135e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4062s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3349s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0712s for 8192 events => throughput is 7.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4500s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1197s for 8192 events => throughput is 7.32E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7127s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9424s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7703s for 90112 events => throughput is 7.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.0835s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0464s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0370s for 90112 events => throughput is 7.49E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.671946e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.677485e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.485706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.683279e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8073s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 8192 events => throughput is 2.50E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7243s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3497s for 90112 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3551s for 90112 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.290435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.285714e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.518069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505353e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.109074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109677e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.162766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147684e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113597e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.170946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164951e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106343e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.432331e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index a18920ba3f..f4a809f68b 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:16:35 +DATE: 2023-11-09_18:34:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2726s - [COUNTERS] Fortran MEs ( 1 ) : 4.1766s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2768s + [COUNTERS] Fortran MEs ( 1 ) : 4.1176s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3607s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2703s - [COUNTERS] Fortran MEs ( 1 ) : 4.0903s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2747s + [COUNTERS] Fortran MEs ( 1 ) : 4.2399s for 8192 events => throughput is 1.93E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.0727s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8744s - [COUNTERS] Fortran MEs ( 1 ) : 45.1984s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.3456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8962s + [COUNTERS] Fortran MEs ( 1 ) : 45.4494s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.3702s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2240s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1462s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.3558s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2546s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 50.9666s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8905s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.0761s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.2827s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9515s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.3313s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.075529e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.068073e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.074082e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.068719e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5244s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3710s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1534s for 8192 events => throughput is 7.10E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3795s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1203s for 8192 events => throughput is 7.31E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.2999s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0272s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.2727s for 90112 events => throughput is 7.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4928s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0115s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4813s for 90112 events => throughput is 7.22E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.487987e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.470531e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.461964e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.461238e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2522s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7532s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4990s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4968s for 8192 events => throughput is 1.65E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.8843s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3862s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4981s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8987s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3780s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5207s for 90112 events => throughput is 1.63E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.703770e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.671559e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.715659e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.684139e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1254s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4306s for 8192 events => throughput is 1.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1397s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4354s for 8192 events => throughput is 1.88E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.0325s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2899s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7425s for 90112 events => throughput is 1.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1254s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3176s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8078s for 90112 events => throughput is 1.87E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.946675e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932083e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.957212e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934934e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7944s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5278s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5312s for 8192 events => throughput is 1.54E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.1973s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3930s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8043s for 90112 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.3073s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4189s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8884s for 90112 events => throughput is 1.53E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558982e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547676e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.568288e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546957e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7522s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 8192 events => throughput is 3.81E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5805s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5981s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3628s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2353s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598757e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602414e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.937809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.925045e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.495923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.484752e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.725491e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.656642e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.498449e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.490786e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.660457e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.725267e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.473649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.471712e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530964e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 05db57554d..9bed8b02d9 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:19:53 +DATE: 2023-11-09_18:37:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2775s - [COUNTERS] Fortran MEs ( 1 ) : 4.0901s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2752s + [COUNTERS] Fortran MEs ( 1 ) : 4.0929s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.4195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2705s - [COUNTERS] Fortran MEs ( 1 ) : 4.1489s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3422s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2703s + [COUNTERS] Fortran MEs ( 1 ) : 4.0719s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1152s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8703s - [COUNTERS] Fortran MEs ( 1 ) : 45.2450s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8864s + [COUNTERS] Fortran MEs ( 1 ) : 45.2857s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.7049s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4327s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2722s for 8192 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6914s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4356s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2558s for 8192 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.0960s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0891s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.0069s for 90112 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.0099s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0604s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.9495s for 90112 events => throughput is 1.88E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.971437e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955214e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.965809e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962469e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4800s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2242s for 8192 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7696s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4653s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3043s for 8192 events => throughput is 3.56E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.5105s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0554s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4551s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.7487s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0795s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.6692s for 90112 events => throughput is 3.65E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.800834e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.767280e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.788503e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.771152e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9633s for 8192 events => throughput is 8.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2225s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9708s for 8192 events => throughput is 8.44E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5514s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8252s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7262s for 90112 events => throughput is 8.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6387s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8343s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8044s for 90112 events => throughput is 8.34E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.756273e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.765902e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.759413e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.708316e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8530s for 8192 events => throughput is 9.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9610s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8536s for 8192 events => throughput is 9.60E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1748s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7107s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4641s for 90112 events => throughput is 9.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2117s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7290s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4827s for 90112 events => throughput is 9.50E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.859146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.837213e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.890303e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.813722e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4412s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0965s for 8192 events => throughput is 7.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4206s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0809s for 8192 events => throughput is 7.58E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9437s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8266s for 90112 events => throughput is 7.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.8540s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9517s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9023s for 90112 events => throughput is 7.57E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.668015e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.664729e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.694387e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.661148e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7246s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3499s for 90112 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7640s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3587s for 90112 events => throughput is 2.51E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.280245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.297023e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536170e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.116522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107408e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.157499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153471e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118088e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.172287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176343e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122850e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120562e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.440669e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436751e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index b972c40fa5..635bc8aab0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:25:30 +DATE: 2023-11-09_18:43:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 95.8408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4545s - [COUNTERS] Fortran MEs ( 1 ) : 95.3863s for 8192 events => throughput is 8.59E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.1979s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4594s + [COUNTERS] Fortran MEs ( 1 ) : 95.7384s for 8192 events => throughput is 8.56E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 95.5040s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4512s - [COUNTERS] Fortran MEs ( 1 ) : 95.0528s for 8192 events => throughput is 8.62E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.1938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4572s + [COUNTERS] Fortran MEs ( 1 ) : 95.7366s for 8192 events => throughput is 8.56E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1050.5151s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1583s - [COUNTERS] Fortran MEs ( 1 ) : 1046.3568s for 90112 events => throughput is 8.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1056.1191s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1851s + [COUNTERS] Fortran MEs ( 1 ) : 1051.9341s for 90112 events => throughput is 8.57E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 216.0448s - [COUNTERS] Fortran Overhead ( 0 ) : 99.5423s - [COUNTERS] CudaCpp MEs ( 2 ) : 116.5025s for 8192 events => throughput is 7.03E+01 events/s + [COUNTERS] PROGRAM TOTAL : 221.2522s + [COUNTERS] Fortran Overhead ( 0 ) : 101.5022s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.7500s for 8192 events => throughput is 6.84E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1395.0826s - [COUNTERS] Fortran Overhead ( 0 ) : 101.4573s - [COUNTERS] CudaCpp MEs ( 2 ) : 1293.6254s for 90112 events => throughput is 6.97E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1409.0435s + [COUNTERS] Fortran Overhead ( 0 ) : 99.0565s + [COUNTERS] CudaCpp MEs ( 2 ) : 1309.9869s for 90112 events => throughput is 6.88E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.294341e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.535302e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.275454e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.232167e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 107.3938s - [COUNTERS] Fortran Overhead ( 0 ) : 49.4703s - [COUNTERS] CudaCpp MEs ( 2 ) : 57.9235s for 8192 events => throughput is 1.41E+02 events/s + [COUNTERS] PROGRAM TOTAL : 107.7463s + [COUNTERS] Fortran Overhead ( 0 ) : 49.5074s + [COUNTERS] CudaCpp MEs ( 2 ) : 58.2390s for 8192 events => throughput is 1.41E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 689.6088s - [COUNTERS] Fortran Overhead ( 0 ) : 53.6676s - [COUNTERS] CudaCpp MEs ( 2 ) : 635.9412s for 90112 events => throughput is 1.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 695.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4125s + [COUNTERS] CudaCpp MEs ( 2 ) : 642.1984s for 90112 events => throughput is 1.40E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.663387e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.667754e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.670748e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672792e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 50.5726s - [COUNTERS] Fortran Overhead ( 0 ) : 23.0971s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.4754s for 8192 events => throughput is 2.98E+02 events/s + [COUNTERS] PROGRAM TOTAL : 50.7441s + [COUNTERS] Fortran Overhead ( 0 ) : 23.3520s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.3921s for 8192 events => throughput is 2.99E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 326.9697s - [COUNTERS] Fortran Overhead ( 0 ) : 26.6301s - [COUNTERS] CudaCpp MEs ( 2 ) : 300.3396s for 90112 events => throughput is 3.00E+02 events/s + [COUNTERS] PROGRAM TOTAL : 331.0298s + [COUNTERS] Fortran Overhead ( 0 ) : 27.1582s + [COUNTERS] CudaCpp MEs ( 2 ) : 303.8716s for 90112 events => throughput is 2.97E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.612820e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602735e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.630261e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.607119e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 44.4764s - [COUNTERS] Fortran Overhead ( 0 ) : 20.3120s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.1644s for 8192 events => throughput is 3.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.2409s + [COUNTERS] Fortran Overhead ( 0 ) : 20.3557s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8852s for 8192 events => throughput is 3.43E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 289.1902s - [COUNTERS] Fortran Overhead ( 0 ) : 23.9124s - [COUNTERS] CudaCpp MEs ( 2 ) : 265.2778s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 289.3981s + [COUNTERS] Fortran Overhead ( 0 ) : 23.9732s + [COUNTERS] CudaCpp MEs ( 2 ) : 265.4249s for 90112 events => throughput is 3.40E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.088132e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.111160e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.127446e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.141844e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.6965s - [COUNTERS] Fortran Overhead ( 0 ) : 22.1825s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.5139s for 8192 events => throughput is 3.48E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.6199s + [COUNTERS] Fortran Overhead ( 0 ) : 22.4059s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.2139s for 8192 events => throughput is 3.53E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 283.5251s - [COUNTERS] Fortran Overhead ( 0 ) : 25.9112s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.6139s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 283.6130s + [COUNTERS] Fortran Overhead ( 0 ) : 26.2046s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.4085s for 90112 events => throughput is 3.50E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763228e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.777930e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741992e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 4.1875s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1069s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0806s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.1979s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1190s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0789s for 8192 events => throughput is 7.59E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 18.7118s - [COUNTERS] Fortran Overhead ( 0 ) : 6.8168s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8950s for 90112 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 18.6565s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7674s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8891s for 90112 events => throughput is 7.58E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.523661e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.527117e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.283120e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256112e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266218e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.240392e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.591927e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.568765e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.251570e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.279873e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.476794e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.441727e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.262349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.268118e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.252080e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.240204e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 3ca211fa85..9a7b15ddba 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_23:51:54 +DATE: 2023-11-09_20:10:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 95.6648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4545s - [COUNTERS] Fortran MEs ( 1 ) : 95.2103s for 8192 events => throughput is 8.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 95.6517s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4537s + [COUNTERS] Fortran MEs ( 1 ) : 95.1980s for 8192 events => throughput is 8.61E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 95.3879s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4521s - [COUNTERS] Fortran MEs ( 1 ) : 94.9358s for 8192 events => throughput is 8.63E+01 events/s + [COUNTERS] PROGRAM TOTAL : 95.5775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4538s + [COUNTERS] Fortran MEs ( 1 ) : 95.1237s for 8192 events => throughput is 8.61E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1051.3512s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1998s - [COUNTERS] Fortran MEs ( 1 ) : 1047.1514s for 90112 events => throughput is 8.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1055.1274s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1731s + [COUNTERS] Fortran MEs ( 1 ) : 1050.9543s for 90112 events => throughput is 8.57E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694768344939596E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 195.2840s - [COUNTERS] Fortran Overhead ( 0 ) : 89.6572s - [COUNTERS] CudaCpp MEs ( 2 ) : 105.6269s for 8192 events => throughput is 7.76E+01 events/s + [COUNTERS] PROGRAM TOTAL : 198.8691s + [COUNTERS] Fortran Overhead ( 0 ) : 90.2534s + [COUNTERS] CudaCpp MEs ( 2 ) : 108.6157s for 8192 events => throughput is 7.54E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361436150871156E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1253.2021s - [COUNTERS] Fortran Overhead ( 0 ) : 93.4786s - [COUNTERS] CudaCpp MEs ( 2 ) : 1159.7235s for 90112 events => throughput is 7.77E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1275.3669s + [COUNTERS] Fortran Overhead ( 0 ) : 93.9491s + [COUNTERS] CudaCpp MEs ( 2 ) : 1181.4178s for 90112 events => throughput is 7.63E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.188520e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.083570e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.207566e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.167448e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694765850750953E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 48.9590s - [COUNTERS] Fortran Overhead ( 0 ) : 23.2330s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7260s for 8192 events => throughput is 3.18E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.8398s + [COUNTERS] Fortran Overhead ( 0 ) : 23.4099s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4299s for 8192 events => throughput is 3.10E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430669586527E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 312.4727s - [COUNTERS] Fortran Overhead ( 0 ) : 26.8498s - [COUNTERS] CudaCpp MEs ( 2 ) : 285.6229s for 90112 events => throughput is 3.15E+02 events/s + [COUNTERS] PROGRAM TOTAL : 320.3836s + [COUNTERS] Fortran Overhead ( 0 ) : 26.9904s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.3932s for 90112 events => throughput is 3.07E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.595667e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.524011e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615224e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562557e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764951124567E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.4046s - [COUNTERS] Fortran Overhead ( 0 ) : 11.8022s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.6023s for 8192 events => throughput is 6.02E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.3018s + [COUNTERS] Fortran Overhead ( 0 ) : 11.8221s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.4798s for 8192 events => throughput is 6.08E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430425531218E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 164.6743s - [COUNTERS] Fortran Overhead ( 0 ) : 15.5764s - [COUNTERS] CudaCpp MEs ( 2 ) : 149.0979s for 90112 events => throughput is 6.04E+02 events/s + [COUNTERS] PROGRAM TOTAL : 161.8530s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4501s + [COUNTERS] CudaCpp MEs ( 2 ) : 146.4028s for 90112 events => throughput is 6.16E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.233727e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213869e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.144603e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.163477e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764951124567E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 22.4388s - [COUNTERS] Fortran Overhead ( 0 ) : 10.5095s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9293s for 8192 events => throughput is 6.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.2497s + [COUNTERS] Fortran Overhead ( 0 ) : 10.3581s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8916s for 8192 events => throughput is 6.89E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430425531218E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 145.9227s - [COUNTERS] Fortran Overhead ( 0 ) : 13.9719s - [COUNTERS] CudaCpp MEs ( 2 ) : 131.9508s for 90112 events => throughput is 6.83E+02 events/s + [COUNTERS] PROGRAM TOTAL : 144.5243s + [COUNTERS] Fortran Overhead ( 0 ) : 14.0601s + [COUNTERS] CudaCpp MEs ( 2 ) : 130.4642s for 90112 events => throughput is 6.91E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.277686e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.261245e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.316223e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.179572e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694767957195604E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 22.8899s - [COUNTERS] Fortran Overhead ( 0 ) : 11.3435s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.5464s for 8192 events => throughput is 7.09E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.8272s + [COUNTERS] Fortran Overhead ( 0 ) : 11.2607s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.5665s for 8192 events => throughput is 7.08E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361435956349820E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 142.7065s - [COUNTERS] Fortran Overhead ( 0 ) : 14.9424s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.7641s for 90112 events => throughput is 7.05E+02 events/s + [COUNTERS] PROGRAM TOTAL : 143.3402s + [COUNTERS] Fortran Overhead ( 0 ) : 14.9961s + [COUNTERS] CudaCpp MEs ( 2 ) : 128.3441s for 90112 events => throughput is 7.02E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.537880e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.537594e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.497574e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.456699e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 2.4801s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4922s for 8192 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4895s for 8192 events => throughput is 1.67E+04 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 11.0377s - [COUNTERS] Fortran Overhead ( 0 ) : 5.5836s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4541s for 90112 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.0626s + [COUNTERS] Fortran Overhead ( 0 ) : 5.6077s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4549s for 90112 events => throughput is 1.65E+04 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.639292e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.640892e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.626171e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.619412e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340657e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.369301e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.426283e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.304460e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.326049e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.376586e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.360046e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.333260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.341201e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.421151e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.441486e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 2729351c42..e947131942 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-09_00:57:00 +DATE: 2023-11-09_21:16:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 95.3917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4564s - [COUNTERS] Fortran MEs ( 1 ) : 94.9352s for 8192 events => throughput is 8.63E+01 events/s + [COUNTERS] PROGRAM TOTAL : 95.6107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4599s + [COUNTERS] Fortran MEs ( 1 ) : 95.1508s for 8192 events => throughput is 8.61E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 95.2404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4495s - [COUNTERS] Fortran MEs ( 1 ) : 94.7909s for 8192 events => throughput is 8.64E+01 events/s + [COUNTERS] PROGRAM TOTAL : 95.5844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4540s + [COUNTERS] Fortran MEs ( 1 ) : 95.1304s for 8192 events => throughput is 8.61E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1049.6483s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1482s - [COUNTERS] Fortran MEs ( 1 ) : 1045.5001s for 90112 events => throughput is 8.62E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1052.2893s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1570s + [COUNTERS] Fortran MEs ( 1 ) : 1048.1323s for 90112 events => throughput is 8.60E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 223.2377s - [COUNTERS] Fortran Overhead ( 0 ) : 102.8564s - [COUNTERS] CudaCpp MEs ( 2 ) : 120.3813s for 8192 events => throughput is 6.81E+01 events/s + [COUNTERS] PROGRAM TOTAL : 223.0748s + [COUNTERS] Fortran Overhead ( 0 ) : 103.3973s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.6775s for 8192 events => throughput is 6.85E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1425.5713s - [COUNTERS] Fortran Overhead ( 0 ) : 106.5194s - [COUNTERS] CudaCpp MEs ( 2 ) : 1319.0519s for 90112 events => throughput is 6.83E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1425.4469s + [COUNTERS] Fortran Overhead ( 0 ) : 107.1167s + [COUNTERS] CudaCpp MEs ( 2 ) : 1318.3302s for 90112 events => throughput is 6.84E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.033155e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.990567e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.028364e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.033316e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 110.1179s - [COUNTERS] Fortran Overhead ( 0 ) : 50.7873s - [COUNTERS] CudaCpp MEs ( 2 ) : 59.3305s for 8192 events => throughput is 1.38E+02 events/s + [COUNTERS] PROGRAM TOTAL : 112.1583s + [COUNTERS] Fortran Overhead ( 0 ) : 51.1368s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.0216s for 8192 events => throughput is 1.34E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 704.2691s - [COUNTERS] Fortran Overhead ( 0 ) : 54.2949s - [COUNTERS] CudaCpp MEs ( 2 ) : 649.9742s for 90112 events => throughput is 1.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 719.1467s + [COUNTERS] Fortran Overhead ( 0 ) : 54.6964s + [COUNTERS] CudaCpp MEs ( 2 ) : 664.4503s for 90112 events => throughput is 1.36E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.635297e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.625730e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628042e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622146e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 48.2204s - [COUNTERS] Fortran Overhead ( 0 ) : 21.9374s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.2831s for 8192 events => throughput is 3.12E+02 events/s + [COUNTERS] PROGRAM TOTAL : 48.7268s + [COUNTERS] Fortran Overhead ( 0 ) : 22.2016s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.5252s for 8192 events => throughput is 3.09E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 314.3646s - [COUNTERS] Fortran Overhead ( 0 ) : 26.1162s - [COUNTERS] CudaCpp MEs ( 2 ) : 288.2484s for 90112 events => throughput is 3.13E+02 events/s + [COUNTERS] PROGRAM TOTAL : 312.7787s + [COUNTERS] Fortran Overhead ( 0 ) : 25.8939s + [COUNTERS] CudaCpp MEs ( 2 ) : 286.8848s for 90112 events => throughput is 3.14E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.810528e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.761983e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.825565e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.775859e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 42.6054s - [COUNTERS] Fortran Overhead ( 0 ) : 19.4149s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.1905s for 8192 events => throughput is 3.53E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.1739s + [COUNTERS] Fortran Overhead ( 0 ) : 19.2356s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.9383s for 8192 events => throughput is 3.57E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 278.0352s - [COUNTERS] Fortran Overhead ( 0 ) : 23.0285s - [COUNTERS] CudaCpp MEs ( 2 ) : 255.0067s for 90112 events => throughput is 3.53E+02 events/s + [COUNTERS] PROGRAM TOTAL : 277.3137s + [COUNTERS] Fortran Overhead ( 0 ) : 23.0478s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.2659s for 90112 events => throughput is 3.54E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.372569e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.346725e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.390556e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.360141e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 44.8365s - [COUNTERS] Fortran Overhead ( 0 ) : 21.9299s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.9066s for 8192 events => throughput is 3.58E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.3760s + [COUNTERS] Fortran Overhead ( 0 ) : 21.9554s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.4206s for 8192 events => throughput is 3.50E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 280.1799s - [COUNTERS] Fortran Overhead ( 0 ) : 25.4637s - [COUNTERS] CudaCpp MEs ( 2 ) : 254.7162s for 90112 events => throughput is 3.54E+02 events/s + [COUNTERS] PROGRAM TOTAL : 283.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 25.7277s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.6465s for 90112 events => throughput is 3.50E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.829822e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.787133e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.840554e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.796022e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 3.5385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8624s for 8192 events => throughput is 9.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5891s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7218s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8672s for 8192 events => throughput is 9.45E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 15.7972s - [COUNTERS] Fortran Overhead ( 0 ) : 6.3222s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4751s for 90112 events => throughput is 9.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.8181s + [COUNTERS] Fortran Overhead ( 0 ) : 6.3338s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4843s for 90112 events => throughput is 9.50E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.416746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.489325e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082101e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.086868e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111361e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112402e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159067e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163573e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107992e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112546e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110248e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110187e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.116277e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113455e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.631653e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.651684e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index a53e3fae12..17d6db3749 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0'make[1]: Nothing to be done for 'all'. - +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:24:05 +DATE: 2023-11-09_18:42:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3033s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s - [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2361s + [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2323s - [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2293s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1700s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4093s - [COUNTERS] Fortran MEs ( 1 ) : 0.7607s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1760s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4125s + [COUNTERS] Fortran MEs ( 1 ) : 0.7635s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3087s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3841s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0760s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3194s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8220s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8289s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089572e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.102064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081996e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,8 +210,8 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2728s + [COUNTERS] PROGRAM TOTAL : 0.3165s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9124s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4682s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4441s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9307s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4537s for 90112 events => throughput is 1.99E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.028339e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.997353e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.046734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027039e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 8192 events => throughput is 3.52E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6989s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2540s for 90112 events => throughput is 3.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2579s for 90112 events => throughput is 3.49E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.552356e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495576e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.523608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465419e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0212s for 8192 events => throughput is 3.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2772s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2296s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6775s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2293s for 90112 events => throughput is 3.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.842884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.760921e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.986906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978083e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 8192 events => throughput is 2.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2977s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 8192 events => throughput is 2.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 90112 events => throughput is 2.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8099s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3413s for 90112 events => throughput is 2.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.637628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.568787e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.616200e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.561174e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.21E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8543s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8466s + [COUNTERS] PROGRAM TOTAL : 1.8698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8622s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.567103e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555687e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.093360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.006338e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.536245e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515172e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.495821e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.526258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.517486e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.533570e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.749421e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.783496e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.528020e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532375e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.773747e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.774257e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8d2e1984e4..a15824491a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:24:34 +DATE: 2023-11-09_18:42:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2407s - [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3036s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2965s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2276s - [COUNTERS] Fortran MEs ( 1 ) : 0.0689s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s + [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4006s - [COUNTERS] Fortran MEs ( 1 ) : 0.7577s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4340s + [COUNTERS] Fortran MEs ( 1 ) : 0.7729s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3794s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.5714s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7649s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8065s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2764s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4961s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7803s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.157942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160144e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172915e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7271s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4513s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2758s for 90112 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7355s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2790s for 90112 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.237957e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.194415e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.272249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.097783e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.41E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5730s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1350s for 90112 events => throughput is 6.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5855s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4486s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1369s for 90112 events => throughput is 6.58E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.530818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.397086e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.313362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.385448e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2587s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1240s for 90112 events => throughput is 7.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5778s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1272s for 90112 events => throughput is 7.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.360354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.864944e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.523552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.826763e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2629s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 8192 events => throughput is 4.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2685s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6820s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1827s for 90112 events => throughput is 4.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6231s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1720s for 90112 events => throughput is 5.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.733153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.932364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.992885e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.764394e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6542s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8501s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.53E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9395s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9332s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.584146e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.830948e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.491850e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.471030e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.856033e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.130497e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.715106e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.724199e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.884678e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.113825e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.799322e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.756435e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.441795e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.896004e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.959495e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 19ad35f402..3468beddc5 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-08_22:25:01 +DATE: 2023-11-09_18:42:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3005s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2346s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2306s - [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s + [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1678s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4091s - [COUNTERS] Fortran MEs ( 1 ) : 0.7587s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2175s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4409s + [COUNTERS] Fortran MEs ( 1 ) : 0.7766s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0746s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3333s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5088s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8245s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3546s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8322s for 90112 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083780e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.093800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087409e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3088s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 8192 events => throughput is 2.05E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8975s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4298s for 90112 events => throughput is 2.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9817s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4479s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021169e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.988560e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.048865e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 8192 events => throughput is 3.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5086s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2658s for 90112 events => throughput is 3.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7176s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2564s for 90112 events => throughput is 3.52E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.485253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495609e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.534003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519650e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2862s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2651s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2542s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0205s for 8192 events => throughput is 3.99E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6779s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2245s for 90112 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6910s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2243s for 90112 events => throughput is 4.02E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.974625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857183e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.057698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.991341e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2985s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2736s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 8192 events => throughput is 2.50E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8085s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3513s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8379s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3586s for 90112 events => throughput is 2.51E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.330681e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.546786e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.533534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.503592e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6613s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8718s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8641s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8739s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.553454e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582711e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.988956e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.041620e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.533250e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534455e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.514727e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.524256e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523754e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513154e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.797491e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.776434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779970e+07 ) sec^-1 TEST COMPLETED