From e0bb7db391f6babd7b575779905c1c2e38bf7bc7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 2 Sep 2024 11:19:43 +0200 Subject: [PATCH] [june24] ** COMPLETE JUNE24 (AGAIN) ** rerun 30 tmad tests on itscrd90 on june24 branch - everything ok STARTED AT Mon Sep 2 06:58:36 AM CEST 2024 (SM tests) ENDED(1) AT Mon Sep 2 11:07:02 AM CEST 2024 [Status=0] (BSM tests) ENDED(1) AT Mon Sep 2 11:17:21 AM CEST 2024 [Status=0] 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 136 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 134 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 138 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 134 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 130 +++++++------- .../log_ggtt_mad_m_inl0_hrd0.txt | 140 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 142 ++++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 138 +++++++-------- .../log_ggttg_mad_m_inl0_hrd0.txt | 138 +++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 148 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 150 ++++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 146 ++++++++-------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 160 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 158 ++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 156 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 140 +++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 138 +++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 140 +++++++-------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 140 +++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 30 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 140 +++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 158 ++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 152 ++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 144 ++++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 138 +++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 134 +++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 132 +++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 130 +++++++------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 130 +++++++------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 144 ++++++++-------- 30 files changed, 2069 insertions(+), 2069 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index daecd2c094..7e3c901212 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:21:29 +DATE: 2024-09-02_06:58:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6967s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6893s - [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6900s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6821s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1740s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1662s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1688s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2869s - [COUNTERS] Fortran MEs ( 1 ) : 0.0826s for 90112 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] Fortran MEs ( 1 ) : 0.0830s for 90112 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424320E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1776s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3688s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0771s for 90112 events => throughput is 1.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3701s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 90112 events => throughput is 1.16E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.156520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163883e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.175361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178699e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.1755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.92E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3392s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0465s for 90112 events => throughput is 1.94E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926932e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935692e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.972682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.000716e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.36E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 90112 events => throughput is 2.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3264s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0347s for 90112 events => throughput is 2.59E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534029e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.591750e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718709e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.740843e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,8 +374,8 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1706s + [COUNTERS] PROGRAM TOTAL : 0.1741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3323s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2981s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0344s for 90112 events => throughput is 2.62E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.675163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.676787e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.769101e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.817332e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1686s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.98E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2916s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3364s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 90112 events => throughput is 2.20E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.080383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.088508e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.174454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.205867e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.6032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6059s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.87E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7563s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7222s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.292897e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.361738e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.688587e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715459e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.531836e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560694e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913745e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907283e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.521901e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.551933e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.905057e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903581e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.526656e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.557040e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.185541e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178676e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 5a284b3bb2..10a885fc0b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:21:46 +DATE: 2024-09-02_06:59:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6783s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7030s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6952s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1677s - [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1724s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2891s - [COUNTERS] Fortran MEs ( 1 ) : 0.0834s for 90112 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s + [COUNTERS] Fortran MEs ( 1 ) : 0.0832s for 90112 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673940164823388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1647s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1669s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552600830551153E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0733s for 90112 events => throughput is 1.23E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3620s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2884s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0734s for 90112 events => throughput is 1.23E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.213993e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.224592e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231490e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.232559e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673937587540376E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1714s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1671s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.06E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552598352995826E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0286s for 90112 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.06E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.102860e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.112816e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.301247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1687s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 90112 events => throughput is 3.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 90112 events => throughput is 3.31E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.421541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502247e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.575027e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.548669e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1720s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.36E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 90112 events => throughput is 3.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0262s for 90112 events => throughput is 3.44E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.471070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.450165e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.660300e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604912e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673941354609866E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1701s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1674s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.98E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552602074172512E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2929s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3224s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 90112 events => throughput is 3.26E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470369e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396726e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725203e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673942834136617E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.6017s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5984s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6072s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552603722717646E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 90112 events => throughput is 1.22E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7330s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7250s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.549092e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.411011e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.631236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.706794e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.819075e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.799155e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.234401e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.248727e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.780965e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.819637e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.267656e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234067e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.364732e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.364267e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895706e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.864212e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 96c3ce60f2..84dcc3d3fe 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -2,13 +2,14 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -16,7 +17,6 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:22:01 +DATE: 2024-09-02_06:59:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6775s - [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6937s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6858s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1699s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1689s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3709s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s - [COUNTERS] Fortran MEs ( 1 ) : 0.0821s for 90112 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s + [COUNTERS] Fortran MEs ( 1 ) : 0.0835s for 90112 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1776s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3753s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 90112 events => throughput is 1.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3745s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2946s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0795s for 90112 events => throughput is 1.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.139138e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.133071e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154121e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.138322e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.84E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 90112 events => throughput is 2.02E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2900s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 90112 events => throughput is 1.99E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.991362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.003701e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.089961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.055731e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1715s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.42E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2932s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 90112 events => throughput is 2.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3299s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0350s for 90112 events => throughput is 2.57E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.597451e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523311e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.673102e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.683822e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1753s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1694s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2975s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 90112 events => throughput is 2.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3292s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 90112 events => throughput is 2.63E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.603419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635646e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.725562e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742968e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.1743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.27E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3345s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 90112 events => throughput is 2.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 90112 events => throughput is 2.11E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.166467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.151910e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.338327e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.357675e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,8 +534,8 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09367 [9.3673952381938416E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1566 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 0.6018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5983s + [COUNTERS] PROGRAM TOTAL : 0.6042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6007s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.89E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09155 [9.1552612789338281E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7353s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7482s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.272692e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.329474e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.763158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.842901e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.531744e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560810e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.899178e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.923949e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.370218e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533688e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.855970e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.908979e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.547442e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.568820e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170954e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185626e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index b0114d5758..239e71b4af 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:22:17 +DATE: 2024-09-02_06:59:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.7737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7331s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7514s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4005s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s - [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4039s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3625s + [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.6712s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2200s - [COUNTERS] Fortran MEs ( 1 ) : 0.4512s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6990s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2403s + [COUNTERS] Fortran MEs ( 1 ) : 0.4587s for 90112 events => throughput is 1.96E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4033s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7381s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2634s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4742s for 90112 events => throughput is 1.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.7708s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4843s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.862738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.892524e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.868889e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.887982e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3888s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3634s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.5417s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2732s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2681s for 90112 events => throughput is 3.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5501s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2703s for 90112 events => throughput is 3.33E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.313561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.217665e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.313013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288412e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3633s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4320s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2636s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1680s for 90112 events => throughput is 5.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4466s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1686s for 90112 events => throughput is 5.34E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.262826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.321385e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.374834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.314700e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3751s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3607s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4316s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1514s for 90112 events => throughput is 5.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4364s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1544s for 90112 events => throughput is 5.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.702530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.771457e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.901849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.864464e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,8 +454,8 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] PROGRAM TOTAL : 0.3881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4896s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2323s for 90112 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5242s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2371s for 90112 events => throughput is 3.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.628731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.611021e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.697546e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697198e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.7972s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7935s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8004s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7034s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7233s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7134s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 90112 events => throughput is 9.89E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.145019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.088329e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.420127e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.585817e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.903378e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.921296e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.708186e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.642394e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.903438e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.901405e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.815730e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.897860e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.891168e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.892353e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.716990e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.692360e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 5bec457add..a10a26ba3e 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -5,8 +5,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:22:43 +DATE: 2024-09-02_07:00:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.7830s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7417s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7437s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4008s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s - [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4070s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3656s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.6651s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s - [COUNTERS] Fortran MEs ( 1 ) : 0.4471s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2377s + [COUNTERS] Fortran MEs ( 1 ) : 0.4554s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190394119386738] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4000s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3587s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156028163566589] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7067s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4485s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7285s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4533s for 90112 events => throughput is 1.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.030791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.999199e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.018545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978384e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190390126085290] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3749s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156024287692041] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4396s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1834s for 90112 events => throughput is 4.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4751s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1856s for 90112 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.745031e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.722011e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.740555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.787563e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 8192 events => throughput is 9.11E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.3593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0961s for 90112 events => throughput is 9.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3789s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0984s for 90112 events => throughput is 9.16E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.314080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.087831e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.260538e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.255190e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3590s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3712s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.32E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.3552s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2643s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0922s for 90112 events => throughput is 9.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.807528e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.782240e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.824773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.861091e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190394861161103] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3636s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156026777408194] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.3813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1242s for 90112 events => throughput is 7.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2849s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1282s for 90112 events => throughput is 7.03E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.802954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.828517e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.242807e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.977153e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190400428492907] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.7963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8261s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.75E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031802494475] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6947s + [COUNTERS] PROGRAM TOTAL : 1.7211s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7122s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.10E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.320560e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368064e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681991e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.622906e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.939579e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.123333e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.369535e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.384526e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.929730e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.086847e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.387560e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.396981e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.692994e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.764264e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.052520e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103142e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index c865bf6334..7510df5e12 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -18,10 +18,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:23:08 +DATE: 2024-09-02_07:00:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.7835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7423s - [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7456s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3580s - [COUNTERS] Fortran MEs ( 1 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3694s + [COUNTERS] Fortran MEs ( 1 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.6693s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2202s - [COUNTERS] Fortran MEs ( 1 ) : 0.4491s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7506s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2796s + [COUNTERS] Fortran MEs ( 1 ) : 0.4710s for 90112 events => throughput is 1.91E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190401334262738] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4118s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3674s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4088s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7494s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2656s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4833s for 90112 events => throughput is 1.86E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.7707s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4869s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.886135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857679e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.870290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.861040e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190401334262724] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3979s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.39E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.6689s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3839s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2845s for 90112 events => throughput is 3.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5519s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2669s for 90112 events => throughput is 3.38E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.333157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.370119e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.262953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.431477e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0151s for 8192 events => throughput is 5.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.5138s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3371s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1763s for 90112 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4492s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1676s for 90112 events => throughput is 5.38E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.326444e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.368052e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.349021e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3735s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3651s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4104s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2613s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1487s for 90112 events => throughput is 6.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2782s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1517s for 90112 events => throughput is 5.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.974222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.945633e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.003657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.030396e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3879s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.4866s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2541s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2320s for 90112 events => throughput is 3.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2367s for 90112 events => throughput is 3.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.645050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.720031e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.721943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841945e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.19 [47.190399989386655] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.8086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8048s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8041s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.16 [47.156031785698381] fbridge_mode=1 [UNWEIGHT] Wrote 1766 events (found 1771 events) - [COUNTERS] PROGRAM TOTAL : 1.7050s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6950s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 90112 events => throughput is 9.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7240s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 90112 events => throughput is 9.81E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.257129e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.108490e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.443056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608485e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.916669e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.892798e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.643495e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.676934e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.757485e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.929262e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.018784e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.040199e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883016e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.901697e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.725531e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.716849e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index e9ca264005..ab28660dbe 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:23:34 +DATE: 2024-09-02_07:00:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.6765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3561s - [COUNTERS] Fortran MEs ( 1 ) : 0.3204s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] Fortran MEs ( 1 ) : 0.3280s for 8192 events => throughput is 2.50E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6348s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3158s - [COUNTERS] Fortran MEs ( 1 ) : 0.3190s for 8192 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3210s + [COUNTERS] Fortran MEs ( 1 ) : 0.3264s for 8192 events => throughput is 2.51E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.0026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4809s - [COUNTERS] Fortran MEs ( 1 ) : 3.5218s for 90112 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1361s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5293s + [COUNTERS] Fortran MEs ( 1 ) : 3.6069s for 90112 events => throughput is 2.50E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3348s for 8192 events => throughput is 2.45E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.6604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3383s for 8192 events => throughput is 2.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.1821s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5214s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6595s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2721s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7347s for 90112 events => throughput is 2.41E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.507558e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.488662e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.547676e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.503105e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395720] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.5005s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1740s for 8192 events => throughput is 4.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3222s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1773s for 8192 events => throughput is 4.62E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 3.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5166s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9230s for 90112 events => throughput is 4.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.5120s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5415s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9697s for 90112 events => throughput is 4.57E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.760284e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692831e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.818037e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.747820e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.4049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0858s for 8192 events => throughput is 9.55E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.4121s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0869s for 8192 events => throughput is 9.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.5018s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9696s for 90112 events => throughput is 9.29E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5004s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5305s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9692s for 90112 events => throughput is 9.30E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.496117e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.824726e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.659471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.100533e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3153s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 8192 events => throughput is 1.04E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.4010s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3220s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0784s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.3923s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8670s for 90112 events => throughput is 1.04E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.4306s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5505s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8795s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.025384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072680e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.032273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.090132e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395724] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.4508s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1164s for 8192 events => throughput is 7.03E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.4311s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1100s for 8192 events => throughput is 7.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.7977s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5634s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2335s for 90112 events => throughput is 7.31E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5313s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2066s for 90112 events => throughput is 7.47E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.461868e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.473786e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.652849e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.507446e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.7658s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 8192 events => throughput is 8.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7683s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7560s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217269E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 1.9812s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9510s + [COUNTERS] PROGRAM TOTAL : 2.0054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9750s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 90112 events => throughput is 3.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.138759e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151435e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.564735e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.583032e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.481528e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.502762e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.163059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161500e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.454061e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.495653e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.166559e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178328e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.473246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.500095e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.640891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.644291e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 17e9f17d1d..61406c8f7b 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:24:15 +DATE: 2024-09-02_07:01:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7128s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s - [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6982s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] Fortran MEs ( 1 ) : 0.3263s for 8192 events => throughput is 2.51E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s - [COUNTERS] Fortran MEs ( 1 ) : 0.3375s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3173s + [COUNTERS] Fortran MEs ( 1 ) : 0.3251s for 8192 events => throughput is 2.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.0043s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4854s - [COUNTERS] Fortran MEs ( 1 ) : 3.5189s for 90112 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0954s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5080s + [COUNTERS] Fortran MEs ( 1 ) : 3.5873s for 90112 events => throughput is 2.51E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562804416188390] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6377s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3189s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3177s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3267s for 8192 events => throughput is 2.51E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946434556369253E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.0665s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5183s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5472s for 90112 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5506s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6107s for 90112 events => throughput is 2.50E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.550863e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.574892e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.559205e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576052e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562802510294199] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.4183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3178s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1000s for 8192 events => throughput is 8.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3212s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1001s for 8192 events => throughput is 8.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946421150520988E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.5899s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5081s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0813s for 90112 events => throughput is 8.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6339s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5347s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0986s for 90112 events => throughput is 8.20E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.459907e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.359747e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.487595e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.308905e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.0104s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5115s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4984s for 90112 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0358s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5014s for 90112 events => throughput is 1.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.685219e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815131e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.818527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.806385e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3597s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3708s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3279s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 1.9605s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5088s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4512s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4595s for 90112 events => throughput is 1.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027880e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.012125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996384e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562804211436801] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0531s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0548s for 8192 events => throughput is 1.49E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946430964077192E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.0898s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5144s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5748s for 90112 events => throughput is 1.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6025s for 90112 events => throughput is 1.50E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.539215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510181e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.581223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.524564e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562804625987131] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.7551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7628s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.39E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946440545672862E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 1.9554s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9408s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0135s for 90112 events => throughput is 6.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9791s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9646s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 90112 events => throughput is 6.79E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.796399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.796780e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.004982e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.966060e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.315412e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.318273e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.200957e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.228359e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.304813e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340547e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.253188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.291829e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.204910e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222050e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.257284e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.261962e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 10937dc1fa..d4a16dbe06 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:24:52 +DATE: 2024-09-02_07:02:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.6814s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3607s - [COUNTERS] Fortran MEs ( 1 ) : 0.3207s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6895s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s + [COUNTERS] Fortran MEs ( 1 ) : 0.3271s for 8192 events => throughput is 2.50E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6411s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3220s - [COUNTERS] Fortran MEs ( 1 ) : 0.3192s for 8192 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6436s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s + [COUNTERS] Fortran MEs ( 1 ) : 0.3227s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.0315s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4940s - [COUNTERS] Fortran MEs ( 1 ) : 3.5375s for 90112 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1083s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5165s + [COUNTERS] Fortran MEs ( 1 ) : 3.5918s for 90112 events => throughput is 2.51E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806184450918] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.6699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3452s for 8192 events => throughput is 2.37E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.6665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3231s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3421s for 8192 events => throughput is 2.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946448664873659E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 5.2814s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5219s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7583s for 90112 events => throughput is 2.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3338s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5463s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7862s for 90112 events => throughput is 2.38E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.494235e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.459745e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.514100e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.468576e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806177750511] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.5093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1840s for 8192 events => throughput is 4.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3230s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1768s for 8192 events => throughput is 4.63E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946448673477319E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 3.4139s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5132s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8998s for 90112 events => throughput is 4.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.5369s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5635s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9727s for 90112 events => throughput is 4.57E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.813034e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.701428e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.829750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.794307e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3137s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0852s for 8192 events => throughput is 9.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4128s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3237s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0884s for 8192 events => throughput is 9.27E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.4604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5166s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9431s for 90112 events => throughput is 9.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4996s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9653s for 90112 events => throughput is 9.34E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.744847e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.510598e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.734137e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.411016e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.3974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0758s for 8192 events => throughput is 1.08E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.3985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3199s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0780s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.3495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8397s for 90112 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 2.3996s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8570s for 90112 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.083861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081438e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.086982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093693e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806177389659] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.4541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1234s for 8192 events => throughput is 6.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3195s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 8192 events => throughput is 7.32E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946448645092413E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 2.8259s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5507s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2743s for 90112 events => throughput is 7.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7734s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5330s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2396s for 90112 events => throughput is 7.27E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.329854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.281646e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.409755e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.341397e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1056 [0.10562806076929508] fbridge_mode=1 [UNWEIGHT] Wrote 391 events (found 1147 events) - [COUNTERS] PROGRAM TOTAL : 0.7637s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 8192 events => throughput is 8.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07995 [7.9946447910357057E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1853 events (found 1858 events) - [COUNTERS] PROGRAM TOTAL : 1.9901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0274s for 90112 events => throughput is 3.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 1.9880s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0273s for 90112 events => throughput is 3.30E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.140804e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.146368e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.592923e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559076e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.426281e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.447733e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159222e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.157373e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.422944e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.443954e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170373e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.168889e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.438089e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.468540e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.627314e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627191e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 28e1b89acf..e1c25239e7 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:25:33 +DATE: 2024-09-02_07:02:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.3916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2504s - [COUNTERS] Fortran MEs ( 1 ) : 4.1412s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4653s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2554s + [COUNTERS] Fortran MEs ( 1 ) : 4.2100s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.3963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2491s - [COUNTERS] Fortran MEs ( 1 ) : 4.1472s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4569s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2471s + [COUNTERS] Fortran MEs ( 1 ) : 4.2098s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 47.5679s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7910s - [COUNTERS] Fortran MEs ( 1 ) : 45.7769s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.3066s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8115s + [COUNTERS] Fortran MEs ( 1 ) : 46.4950s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514666] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.5396s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2513s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2798s for 8192 events => throughput is 1.91E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 4.6394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2529s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3779s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 49.2859s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8217s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.4558s for 90112 events => throughput is 1.90E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 50.0665s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8392s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.2181s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0092s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.960157e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.929856e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.963511e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.936527e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 2.5260s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2484s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2730s for 8192 events => throughput is 3.60E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s + [COUNTERS] PROGRAM TOTAL : 2.5862s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2518s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3295s for 8192 events => throughput is 3.52E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 27.1055s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8077s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.2930s for 90112 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8641s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7084s for 90112 events => throughput is 3.51E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.689861e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.637176e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.680815e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.613295e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.2438s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2537s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9875s for 8192 events => throughput is 8.30E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s + [COUNTERS] PROGRAM TOTAL : 1.2704s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2536s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0143s for 8192 events => throughput is 8.08E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 12.7703s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8004s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.9675s for 90112 events => throughput is 8.22E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 13.1377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8650s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.2701s for 90112 events => throughput is 8.00E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.915334e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.206026e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.437730e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.331692e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.1432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8855s for 8192 events => throughput is 9.25E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 1.1622s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9066s for 8192 events => throughput is 9.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 11.6139s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8175s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7941s for 90112 events => throughput is 9.20E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [COUNTERS] PROGRAM TOTAL : 11.7181s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8398s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.8761s for 90112 events => throughput is 9.12E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.083425e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233002e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.137309e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.450469e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.4435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1748s for 8192 events => throughput is 6.97E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 1.3833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2521s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1284s for 8192 events => throughput is 7.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 14.1169s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8159s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.2979s for 90112 events => throughput is 7.33E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 14.3179s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8300s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4849s for 90112 events => throughput is 7.22E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.420117e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.196841e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.396267e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.350514e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,8 +534,8 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.7556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6820s + [COUNTERS] PROGRAM TOTAL : 0.7641s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6905s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607690] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 2.6505s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2461s + [COUNTERS] PROGRAM TOTAL : 2.6595s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2552s [COUNTERS] CudaCpp MEs ( 2 ) : 0.3695s for 90112 events => throughput is 2.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.144921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.140664e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.350460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.354973e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.133000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118962e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.158003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168678e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128635e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.181777e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173072e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.127323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126252e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.425233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.428223e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 98cd036eed..a94ddeb453 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -3,23 +3,23 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:29:29 +DATE: 2024-09-02_07:06:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.4044s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s - [COUNTERS] Fortran MEs ( 1 ) : 4.1545s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2531s + [COUNTERS] Fortran MEs ( 1 ) : 4.2066s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.3930s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2487s - [COUNTERS] Fortran MEs ( 1 ) : 4.1443s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2495s + [COUNTERS] Fortran MEs ( 1 ) : 4.2209s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 47.6072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8001s - [COUNTERS] Fortran MEs ( 1 ) : 45.8071s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.2714s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8151s + [COUNTERS] Fortran MEs ( 1 ) : 46.4563s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063782001975612] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.4437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2484s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1867s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5088s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2538s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2465s for 8192 events => throughput is 1.93E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -170,10 +170,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154547896253576] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 47.8073s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7987s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.0000s for 90112 events => throughput is 1.96E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 48.6783s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8455s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.8243s for 90112 events => throughput is 1.92E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0084s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -186,12 +186,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.008295e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984164e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.975086e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972429e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -215,10 +215,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063772189507497] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.4105s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2517s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1562s for 8192 events => throughput is 7.09E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s + [COUNTERS] PROGRAM TOTAL : 1.4262s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2504s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1731s for 8192 events => throughput is 6.98E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -250,9 +250,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154543829232032] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 14.5090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8076s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6988s for 90112 events => throughput is 7.10E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8367s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.9305s for 90112 events => throughput is 6.97E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -266,12 +266,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.254564e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.148962e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.282054e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.178836e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -295,10 +295,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.7544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5023s for 8192 events => throughput is 1.63E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.7622s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2497s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5111s for 8192 events => throughput is 1.60E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -330,9 +330,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 7.3776s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8033s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5729s for 90112 events => throughput is 1.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.4857s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8369s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6474s for 90112 events => throughput is 1.60E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -346,12 +346,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.672010e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642140e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.669560e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.645647e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -375,10 +375,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.7012s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4533s for 8192 events => throughput is 1.81E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [COUNTERS] PROGRAM TOTAL : 0.7061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2524s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4523s for 8192 events => throughput is 1.81E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -410,10 +410,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 6.8424s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8236s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.0174s for 90112 events => throughput is 1.80E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [COUNTERS] PROGRAM TOTAL : 6.8659s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8249s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.0396s for 90112 events => throughput is 1.79E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -426,12 +426,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.897416e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858052e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.887439e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.875566e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -455,10 +455,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063796149473241] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.8125s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5550s for 8192 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 0.8115s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2523s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 8192 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -490,10 +490,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154549699006573] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 7.8695s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8004s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0673s for 90112 events => throughput is 1.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 8.1202s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8433s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.2752s for 90112 events => throughput is 1.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -506,12 +506,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.508161e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.477080e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.509788e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.470763e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -535,8 +535,8 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063814953416677] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.7341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6826s + [COUNTERS] PROGRAM TOTAL : 0.7381s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6865s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.04E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s @@ -570,9 +570,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154554638015539] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 2.4979s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2549s for 90112 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5327s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2524s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2557s for 90112 events => throughput is 3.52E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -586,42 +586,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.103050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.116091e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.389662e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.104467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096626e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.230140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.211336e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.125382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.102050e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.224276e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.224185e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.086938e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.086520e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391380e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 8da744076b..a2933249ce 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -18,9 +18,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:32:36 +DATE: 2024-09-02_07:10:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.4204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2508s - [COUNTERS] Fortran MEs ( 1 ) : 4.1697s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2529s + [COUNTERS] Fortran MEs ( 1 ) : 4.2170s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.3959s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s - [COUNTERS] Fortran MEs ( 1 ) : 4.1488s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4662s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2467s + [COUNTERS] Fortran MEs ( 1 ) : 4.2195s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 47.6293s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8051s - [COUNTERS] Fortran MEs ( 1 ) : 45.8241s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.1948s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8028s + [COUNTERS] Fortran MEs ( 1 ) : 46.3919s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063417389679768] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 4.6218s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2544s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3588s for 8192 events => throughput is 1.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 4.6823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2633s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4100s for 8192 events => throughput is 1.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487480444804] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 50.0743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8184s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.2472s for 90112 events => throughput is 1.87E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 50.6321s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8415s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.7818s for 90112 events => throughput is 1.85E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0088s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.932780e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912608e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.942839e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910493e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063417194462525] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 2.5436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2486s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2902s for 8192 events => throughput is 3.58E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 2.5988s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2528s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3410s for 8192 events => throughput is 3.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487462148848] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 27.1149s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8064s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.3031s for 90112 events => throughput is 3.56E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s + [COUNTERS] PROGRAM TOTAL : 27.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8414s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7133s for 90112 events => throughput is 3.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.637022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589262e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.660481e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.503515e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.2540s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2533s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9984s for 8192 events => throughput is 8.21E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 1.2772s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2553s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0194s for 8192 events => throughput is 8.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 12.8716s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8199s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.0492s for 90112 events => throughput is 8.16E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 13.0793s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8712s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.2057s for 90112 events => throughput is 8.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.087072e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.252678e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.330091e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.263765e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.1320s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8774s for 8192 events => throughput is 9.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1414s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8858s for 8192 events => throughput is 9.25E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 11.4083s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7978s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6083s for 90112 events => throughput is 9.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.5724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8374s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7328s for 90112 events => throughput is 9.26E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.748310e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.552325e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.712906e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.532682e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 1.3794s + [COUNTERS] PROGRAM TOTAL : 1.3940s [COUNTERS] Fortran Overhead ( 0 ) : 0.2528s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1237s for 8192 events => throughput is 7.29E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1383s for 8192 events => throughput is 7.20E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 14.1562s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8064s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3468s for 90112 events => throughput is 7.30E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 14.3865s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8402s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5435s for 90112 events => throughput is 7.18E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.143146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.283591e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.318481e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.319158e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.7806 [0.78063416917067197] fbridge_mode=1 [UNWEIGHT] Wrote 10 events (found 192 events) - [COUNTERS] PROGRAM TOTAL : 0.7765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7034s + [COUNTERS] PROGRAM TOTAL : 0.7609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6877s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0383s for 8192 events => throughput is 2.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.1915 [0.19154487447873400] fbridge_mode=1 [UNWEIGHT] Wrote 27 events (found 312 events) - [COUNTERS] PROGRAM TOTAL : 2.6312s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3709s for 90112 events => throughput is 2.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6544s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3705s for 90112 events => throughput is 2.43E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.165626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.166594e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.359746e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346522e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.132493e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128597e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164869e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162506e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.136773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.127115e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.166507e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169117e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130360e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.419474e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.417587e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index c67135f096..0cf7735a4c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -20,8 +20,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:37:58 +DATE: 2024-09-02_07:15:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 96.9734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4850s - [COUNTERS] Fortran MEs ( 1 ) : 96.4885s for 8192 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 98.0853s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4849s + [COUNTERS] Fortran MEs ( 1 ) : 97.6004s for 8192 events => throughput is 8.39E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 97.2728s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4754s - [COUNTERS] Fortran MEs ( 1 ) : 96.7974s for 8192 events => throughput is 8.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 98.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5077s + [COUNTERS] Fortran MEs ( 1 ) : 97.9055s for 8192 events => throughput is 8.37E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1066.0771s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3353s - [COUNTERS] Fortran MEs ( 1 ) : 1061.7418s for 90112 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1077.8633s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4153s + [COUNTERS] Fortran MEs ( 1 ) : 1073.4480s for 90112 events => throughput is 8.39E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 122.8376s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4787s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.1623s for 8192 events => throughput is 6.71E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1966s + [COUNTERS] PROGRAM TOTAL : 121.6812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4851s + [COUNTERS] CudaCpp MEs ( 2 ) : 120.9979s for 8192 events => throughput is 6.77E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1982s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130207E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1348.3124s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2937s - [COUNTERS] CudaCpp MEs ( 2 ) : 1343.8245s for 90112 events => throughput is 6.71E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1942s + [COUNTERS] PROGRAM TOTAL : 1343.2729s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3800s + [COUNTERS] CudaCpp MEs ( 2 ) : 1338.6826s for 90112 events => throughput is 6.73E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2103s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.931480e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.808964e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.346009e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.773682e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148497E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 59.9516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4786s - [COUNTERS] CudaCpp MEs ( 2 ) : 59.3740s for 8192 events => throughput is 1.38E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0989s + [COUNTERS] PROGRAM TOTAL : 60.4426s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4815s + [COUNTERS] CudaCpp MEs ( 2 ) : 59.8630s for 8192 events => throughput is 1.37E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0981s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130209E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 658.6475s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2991s - [COUNTERS] CudaCpp MEs ( 2 ) : 654.2486s for 90112 events => throughput is 1.38E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0998s + [COUNTERS] PROGRAM TOTAL : 669.7982s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3750s + [COUNTERS] CudaCpp MEs ( 2 ) : 665.3245s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0987s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.625941e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620968e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.636342e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.623378e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.4047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4789s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.8793s for 8192 events => throughput is 2.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0466s + [COUNTERS] PROGRAM TOTAL : 28.8185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4803s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.2908s for 8192 events => throughput is 2.90E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0473s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 311.3726s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3146s - [COUNTERS] CudaCpp MEs ( 2 ) : 307.0111s for 90112 events => throughput is 2.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0468s + [COUNTERS] PROGRAM TOTAL : 316.9557s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3978s + [COUNTERS] CudaCpp MEs ( 2 ) : 312.5116s for 90112 events => throughput is 2.88E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0463s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.513912e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.474527e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.526038e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428059e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.0727s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4715s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.5612s for 8192 events => throughput is 3.34E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0400s + [COUNTERS] PROGRAM TOTAL : 25.4499s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4870s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9217s for 8192 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0412s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 277.1350s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2964s - [COUNTERS] CudaCpp MEs ( 2 ) : 272.7990s for 90112 events => throughput is 3.30E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s + [COUNTERS] PROGRAM TOTAL : 278.1997s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5246s + [COUNTERS] CudaCpp MEs ( 2 ) : 273.6350s for 90112 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0401s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.072286e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.008315e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.046896e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.020269e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.4560s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4728s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.9381s for 8192 events => throughput is 3.42E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0450s + [COUNTERS] PROGRAM TOTAL : 25.4182s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4828s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8908s for 8192 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0447s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 269.6330s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3142s - [COUNTERS] CudaCpp MEs ( 2 ) : 265.2762s for 90112 events => throughput is 3.40E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0426s + [COUNTERS] PROGRAM TOTAL : 279.7174s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3551s + [COUNTERS] CudaCpp MEs ( 2 ) : 275.3169s for 90112 events => throughput is 3.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0454s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.656977e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540558e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.689669e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.595223e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 3.1617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9774s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1002s for 8192 events => throughput is 7.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0841s + [COUNTERS] PROGRAM TOTAL : 3.1566s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9705s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1003s for 8192 events => throughput is 7.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0858s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130222E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 17.8388s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8039s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9436s for 90112 events => throughput is 7.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0913s + [COUNTERS] PROGRAM TOTAL : 17.8702s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8335s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9487s for 90112 events => throughput is 7.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0880s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.499559e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.494172e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.234006e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256504e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266182e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.274646e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.552602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.540338e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.275473e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.264214e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.446988e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.441628e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.262534e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.277309e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.240430e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.246710e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index f74c72034a..559911b6ff 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_00:59:23 +DATE: 2024-09-02_08:37:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 97.0139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4830s - [COUNTERS] Fortran MEs ( 1 ) : 96.5309s for 8192 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 98.3589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4847s + [COUNTERS] Fortran MEs ( 1 ) : 97.8742s for 8192 events => throughput is 8.37E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 96.7363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4803s - [COUNTERS] Fortran MEs ( 1 ) : 96.2560s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.9037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4807s + [COUNTERS] Fortran MEs ( 1 ) : 97.4230s for 8192 events => throughput is 8.41E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1064.7258s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3425s - [COUNTERS] Fortran MEs ( 1 ) : 1060.3833s for 90112 events => throughput is 8.50E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1108.9567s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5470s + [COUNTERS] Fortran MEs ( 1 ) : 1104.4097s for 90112 events => throughput is 8.16E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -135,10 +135,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011250641073541E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 112.6859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4791s - [COUNTERS] CudaCpp MEs ( 2 ) : 112.0199s for 8192 events => throughput is 7.31E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1869s + [COUNTERS] PROGRAM TOTAL : 113.8614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4939s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.1804s for 8192 events => throughput is 7.24E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1871s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -171,10 +171,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644964998437398E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1244.7020s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3392s - [COUNTERS] CudaCpp MEs ( 2 ) : 1240.1810s for 90112 events => throughput is 7.27E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1818s + [COUNTERS] PROGRAM TOTAL : 1254.3527s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4844s + [COUNTERS] CudaCpp MEs ( 2 ) : 1249.6808s for 90112 events => throughput is 7.21E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1875s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -187,12 +187,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.855192e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.538641e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.844824e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.488951e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -217,10 +217,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011248466338516E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 27.2286s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4733s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.7097s for 8192 events => throughput is 3.07E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0457s + [COUNTERS] PROGRAM TOTAL : 28.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4980s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.8049s for 8192 events => throughput is 2.95E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0478s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -253,10 +253,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644960006557758E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 298.1598s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3168s - [COUNTERS] CudaCpp MEs ( 2 ) : 293.7965s for 90112 events => throughput is 3.07E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0465s + [COUNTERS] PROGRAM TOTAL : 310.5929s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4914s + [COUNTERS] CudaCpp MEs ( 2 ) : 306.0526s for 90112 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0488s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -269,12 +269,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.511919e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372253e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.497649e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.379357e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -299,10 +299,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 14.5108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4752s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.0129s for 8192 events => throughput is 5.85E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0227s + [COUNTERS] PROGRAM TOTAL : 14.8725s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4938s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.3547s for 8192 events => throughput is 5.71E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0240s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -335,10 +335,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 158.5681s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3155s - [COUNTERS] CudaCpp MEs ( 2 ) : 154.2287s for 90112 events => throughput is 5.84E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0239s + [COUNTERS] PROGRAM TOTAL : 162.3269s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4878s + [COUNTERS] CudaCpp MEs ( 2 ) : 157.8145s for 90112 events => throughput is 5.71E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -351,12 +351,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.099740e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.811808e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.119533e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.750586e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -381,10 +381,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 12.8834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4713s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3915s for 8192 events => throughput is 6.61E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0207s + [COUNTERS] PROGRAM TOTAL : 13.3225s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4965s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8043s for 8192 events => throughput is 6.40E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0217s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -417,10 +417,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 139.2471s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2922s - [COUNTERS] CudaCpp MEs ( 2 ) : 134.9348s for 90112 events => throughput is 6.68E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0201s + [COUNTERS] PROGRAM TOTAL : 146.7771s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4866s + [COUNTERS] CudaCpp MEs ( 2 ) : 142.2688s for 90112 events => throughput is 6.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0217s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -433,12 +433,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.081127e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.705366e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.069440e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.670587e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -463,10 +463,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011251360912330E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 12.3768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4770s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8774s for 8192 events => throughput is 6.90E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0225s + [COUNTERS] PROGRAM TOTAL : 13.4800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4932s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.9621s for 8192 events => throughput is 6.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0247s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -499,10 +499,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644957106171463E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 136.4090s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3035s - [COUNTERS] CudaCpp MEs ( 2 ) : 132.0816s for 90112 events => throughput is 6.82E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0239s + [COUNTERS] PROGRAM TOTAL : 145.7072s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4846s + [COUNTERS] CudaCpp MEs ( 2 ) : 141.1985s for 90112 events => throughput is 6.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0241s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -515,12 +515,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.281845e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.841319e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.373062e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.788003e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -544,10 +544,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2011257191623754E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.0557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9634s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5338s for 8192 events => throughput is 1.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5585s + [COUNTERS] PROGRAM TOTAL : 2.0804s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5414s for 8192 events => throughput is 1.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5532s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -579,10 +579,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2644969729873264E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 11.1748s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7805s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8427s for 90112 events => throughput is 1.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5517s + [COUNTERS] PROGRAM TOTAL : 11.3529s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9667s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8330s for 90112 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5531s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -595,42 +595,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.541524e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.531202e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.549292e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.528697e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.099302e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.145335e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.212509e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156492e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.136958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.149709e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.131463e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.142704e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113807e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.153881e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.956391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.001184e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index cd2b108cad..2b83738bd0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -4,11 +4,11 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_02:01:57 +DATE: 2024-09-02_09:42:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 97.1231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4738s - [COUNTERS] Fortran MEs ( 1 ) : 96.6493s for 8192 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.8249s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4958s + [COUNTERS] Fortran MEs ( 1 ) : 100.3291s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 97.0792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4770s - [COUNTERS] Fortran MEs ( 1 ) : 96.6022s for 8192 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.7299s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5005s + [COUNTERS] Fortran MEs ( 1 ) : 100.2294s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1068.1865s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3396s - [COUNTERS] Fortran MEs ( 1 ) : 1063.8469s for 90112 events => throughput is 8.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1108.8237s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5214s + [COUNTERS] Fortran MEs ( 1 ) : 1104.3024s for 90112 events => throughput is 8.16E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575613215040E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 122.0617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4956s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.3618s for 8192 events => throughput is 6.75E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2043s + [COUNTERS] PROGRAM TOTAL : 128.8019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4954s + [COUNTERS] CudaCpp MEs ( 2 ) : 128.0916s for 8192 events => throughput is 6.40E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2149s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741947481977E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 1300.3896s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3107s - [COUNTERS] CudaCpp MEs ( 2 ) : 1295.8740s for 90112 events => throughput is 6.95E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2049s + [COUNTERS] PROGRAM TOTAL : 1433.7153s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4970s + [COUNTERS] CudaCpp MEs ( 2 ) : 1429.0044s for 90112 events => throughput is 6.31E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2140s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.809869e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.483669e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.873331e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.450709e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575624556593E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 61.0754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4769s - [COUNTERS] CudaCpp MEs ( 2 ) : 60.4998s for 8192 events => throughput is 1.35E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0988s + [COUNTERS] PROGRAM TOTAL : 64.0631s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4951s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.4631s for 8192 events => throughput is 1.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1049s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741950090396E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 673.9577s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3272s - [COUNTERS] CudaCpp MEs ( 2 ) : 669.5308s for 90112 events => throughput is 1.35E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0997s + [COUNTERS] PROGRAM TOTAL : 691.6461s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4363s + [COUNTERS] CudaCpp MEs ( 2 ) : 687.1049s for 90112 events => throughput is 1.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1049s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.597786e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547673e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.593442e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.575361e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 27.1778s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4735s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.6599s for 8192 events => throughput is 3.07E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0444s + [COUNTERS] PROGRAM TOTAL : 28.1802s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4830s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6511s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 296.7376s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3098s - [COUNTERS] CudaCpp MEs ( 2 ) : 292.3842s for 90112 events => throughput is 3.08E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s + [COUNTERS] PROGRAM TOTAL : 312.1970s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4214s + [COUNTERS] CudaCpp MEs ( 2 ) : 307.7313s for 90112 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0444s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.679158e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529434e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.695318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.534956e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 23.7526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4784s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.2363s for 8192 events => throughput is 3.53E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0379s + [COUNTERS] PROGRAM TOTAL : 25.0518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4954s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5169s for 8192 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 261.7863s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3209s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.4280s for 90112 events => throughput is 3.50E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0375s + [COUNTERS] PROGRAM TOTAL : 274.0316s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4537s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.5401s for 90112 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0378s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.307001e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.093052e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.317389e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.105741e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.7488s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4758s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.2293s for 8192 events => throughput is 3.38E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s + [COUNTERS] PROGRAM TOTAL : 26.5352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4935s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.9943s for 8192 events => throughput is 3.15E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0474s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 268.0088s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 263.6236s for 90112 events => throughput is 3.42E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0433s + [COUNTERS] PROGRAM TOTAL : 288.1592s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4946s + [COUNTERS] CudaCpp MEs ( 2 ) : 283.6177s for 90112 events => throughput is 3.18E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0469s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.743317e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498867e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.691916e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.594525e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.201e-06 [1.2009575531257951E-006] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.7248s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9716s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8776s for 8192 events => throughput is 9.33E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8755s + [COUNTERS] PROGRAM TOTAL : 2.7488s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8781s for 8192 events => throughput is 9.33E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8717s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.264e-07 [2.2641741822510739E-007] fbridge_mode=1 [UNWEIGHT] Wrote 418 events (found 1570 events) - [COUNTERS] PROGRAM TOTAL : 15.1519s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7832s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4907s for 90112 events => throughput is 9.49E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8780s + [COUNTERS] PROGRAM TOTAL : 15.2656s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9039s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4902s for 90112 events => throughput is 9.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8715s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.438154e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.413169e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080719e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072902e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.105596e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107520e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.153192e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152995e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110229e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106323e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110080e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103746e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109639e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102767e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.675260e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.685267e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 020a3ed712..5909437ffc 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:36:33 +DATE: 2024-09-02_07:14:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4036s - [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s + [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.4034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3336s - [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3294s + [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.2186s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4557s - [COUNTERS] Fortran MEs ( 1 ) : 0.7629s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2516s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4776s + [COUNTERS] Fortran MEs ( 1 ) : 0.7740s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600993] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.4086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3334s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.3251s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4975s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8266s for 90112 events => throughput is 1.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 2.3617s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5265s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8345s for 90112 events => throughput is 1.08E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.102431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047847e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.102848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047146e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520601043] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3281s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293380] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.9467s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4915s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4546s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5274s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4667s for 90112 events => throughput is 1.93E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.005465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951130e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989351e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.939973e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3542s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3349s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 8192 events => throughput is 3.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.7567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4944s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2617s for 90112 events => throughput is 3.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7877s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5214s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2657s for 90112 events => throughput is 3.39E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.353079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368351e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.328584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359295e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3309s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0212s for 8192 events => throughput is 3.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3342s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.7299s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2372s for 90112 events => throughput is 3.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2400s for 90112 events => throughput is 3.75E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.674323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.727346e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.785001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.654587e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3677s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3347s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0336s for 8192 events => throughput is 2.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.8509s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5019s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3483s for 90112 events => throughput is 2.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8979s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5359s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3614s for 90112 events => throughput is 2.49E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.506958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.506558e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.508939e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.444740e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520601049] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.7681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7638s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7662s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.9383s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9265s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9624s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.51E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.077452e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.030039e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.475646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498510e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.336316e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346135e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.176205e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.208610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.337529e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346446e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.372428e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.351501e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.338223e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.350079e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653233e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.647846e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 81fbed4576..7681a43fcc 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:37:01 +DATE: 2024-09-02_07:14:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4687s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s - [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s + [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3265s - [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s + [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.2308s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4622s - [COUNTERS] Fortran MEs ( 1 ) : 0.7687s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2559s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4804s + [COUNTERS] Fortran MEs ( 1 ) : 0.7755s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343387711996092] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.4011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3302s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0703s for 8192 events => throughput is 1.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.4065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182383177444153] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.2786s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7768s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5293s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7840s for 90112 events => throughput is 1.15E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.182628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.144617e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.135680e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152269e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343383490650730] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3554s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3288s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3584s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3319s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182379928139489] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.7825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2818s for 90112 events => throughput is 3.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8149s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5264s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2880s for 90112 events => throughput is 3.13E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.086516e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.005069e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.150660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.006782e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3464s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3332s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3632s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.6359s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4958s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1396s for 90112 events => throughput is 6.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.7147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1511s for 90112 events => throughput is 5.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.062674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.215848e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.222354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.190438e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3458s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0120s for 8192 events => throughput is 6.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.6264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1293s for 90112 events => throughput is 6.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1324s for 90112 events => throughput is 6.81E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.684833e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.618737e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.821989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.707552e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343386589929475] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3490s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3500s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3324s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382325497387] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.6876s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5098s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1772s for 90112 events => throughput is 5.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7111s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5292s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1814s for 90112 events => throughput is 4.97E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.028392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952119e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.834126e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.787120e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343391019497171] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.7679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7639s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.83E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182386711435958] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.9377s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9277s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 90112 events => throughput is 9.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.69E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.313904e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.122207e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.406775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428061e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153018e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.203636e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.636283e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.653874e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.128642e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.231865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.757313e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.767629e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.740120e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.821899e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.187604e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.239707e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 74ef61bb65..22704e3e7a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -3,19 +3,19 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppnone - +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-21_23:37:29 +DATE: 2024-09-02_07:14:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4021s - [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4187s + [COUNTERS] Fortran MEs ( 1 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.4011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s - [COUNTERS] Fortran MEs ( 1 ) : 0.0697s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4016s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3303s + [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.2267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4591s - [COUNTERS] Fortran MEs ( 1 ) : 0.7675s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2740s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4925s + [COUNTERS] Fortran MEs ( 1 ) : 0.7816s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.4049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3282s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0759s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3350s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382982924081] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.3238s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8213s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3489s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5147s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8334s for 90112 events => throughput is 1.08E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093280e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101637e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058279e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3322s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382982924075] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.9560s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5070s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4484s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9884s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5305s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4573s for 90112 events => throughput is 1.97E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.933401e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.983497e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3303s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.22E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.7602s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4992s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2604s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5351s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2641s for 90112 events => throughput is 3.41E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.395360e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.423227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.412066e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3571s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.7394s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5095s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2294s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5252s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2338s for 90112 events => throughput is 3.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.800485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800197e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.827388e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.822704e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.3626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.3712s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3363s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 8192 events => throughput is 2.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.8592s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4977s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3608s for 90112 events => throughput is 2.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.9135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3766s for 90112 events => throughput is 2.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.391597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.415462e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.393779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.367663e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2734 [0.27343385529180364] fbridge_mode=1 [UNWEIGHT] Wrote 491 events (found 1236 events) - [COUNTERS] PROGRAM TOTAL : 0.7639s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7720s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2118 [0.21182382978588427] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 1.9462s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 90112 events => throughput is 8.39E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.943095e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.042449e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.489937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516487e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.342569e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.381346e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.224982e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.139906e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.317376e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.358960e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.362791e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.309991e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.326289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.360701e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657342e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653745e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 537d9ab035..d5bb978468 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:23:40 +DATE: 2024-09-02_11:07:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8882s - [COUNTERS] Fortran MEs ( 1 ) : 0.0465s for 8192 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8654s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4335s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3843s - [COUNTERS] Fortran MEs ( 1 ) : 0.0493s for 8192 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3664s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.8992s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3586s - [COUNTERS] Fortran MEs ( 1 ) : 0.5406s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8115s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2952s + [COUNTERS] Fortran MEs ( 1 ) : 0.5163s for 90112 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0502s for 8192 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0499s for 8192 events => throughput is 1.64E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.8095s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5403s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8401s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5515s for 90112 events => throughput is 1.63E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.704082e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672440e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.693622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.675836e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3639s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3730s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 8192 events => throughput is 2.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.5668s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2912s for 90112 events => throughput is 3.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5909s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2912s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2992s for 90112 events => throughput is 3.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.077969e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.069895e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.093492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.024323e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0165s for 8192 events => throughput is 4.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3853s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.4548s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1798s for 90112 events => throughput is 5.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1808s for 90112 events => throughput is 4.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.934832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.848671e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.004753e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.970653e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3651s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.4376s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1628s for 90112 events => throughput is 5.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3279s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1752s for 90112 events => throughput is 5.14E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.562123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.392257e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.438518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.482164e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3886s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187290] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.5255s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5398s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2446s for 90112 events => throughput is 3.68E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.508019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476730e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.599772e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.588328e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7995s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8170s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187299] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.7264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7162s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7334s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7230s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 90112 events => throughput is 9.39E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.095997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.870351e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.504453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.356572e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.855286e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.807956e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.278079e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.149618e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.878989e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.828479e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.591936e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.485968e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.889184e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818735e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521297e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.484450e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 042dc1d8e8..e697772733 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,16 +1,16 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:24:06 +DATE: 2024-09-02_11:07:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9477s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8990s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8660s + [COUNTERS] Fortran MEs ( 1 ) : 0.0467s for 8192 events => throughput is 1.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4149s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s - [COUNTERS] Fortran MEs ( 1 ) : 0.0469s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4136s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] Fortran MEs ( 1 ) : 0.0466s for 8192 events => throughput is 1.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.8101s + [COUNTERS] PROGRAM TOTAL : 1.8083s [COUNTERS] Fortran Overhead ( 0 ) : 1.2948s - [COUNTERS] Fortran MEs ( 1 ) : 0.5153s for 90112 events => throughput is 1.75E+05 events/s + [COUNTERS] Fortran MEs ( 1 ) : 0.5135s for 90112 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.4156s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3689s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4175s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0469s for 8192 events => throughput is 1.75E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 1aeb1a3188..0da4f300a0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -4,17 +4,17 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 - +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:24:12 +DATE: 2024-09-02_11:07:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.8970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8506s - [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8764s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3656s - [COUNTERS] Fortran MEs ( 1 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s + [COUNTERS] Fortran MEs ( 1 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.7806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2737s - [COUNTERS] Fortran MEs ( 1 ) : 0.5069s for 90112 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8031s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2900s + [COUNTERS] Fortran MEs ( 1 ) : 0.5131s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -135,9 +135,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4129s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0502s for 8192 events => throughput is 1.63E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -171,9 +171,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240835006229] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.8124s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2700s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5419s for 90112 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8344s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5487s for 90112 events => throughput is 1.64E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -189,13 +189,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.537103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.553823e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.568161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.574287e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -220,9 +220,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -256,9 +256,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240835006233] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.5854s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2892s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2957s for 90112 events => throughput is 3.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5971s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2953s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3013s for 90112 events => throughput is 2.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -274,13 +274,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.861099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.829435e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.919059e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886586e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -305,10 +305,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3796s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3626s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3863s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3690s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -341,9 +341,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.4551s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 90112 events => throughput is 5.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1856s for 90112 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -359,13 +359,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.594828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.691631e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.842693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.845209e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -390,10 +390,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3700s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -426,9 +426,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.4459s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1666s for 90112 events => throughput is 5.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4583s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1698s for 90112 events => throughput is 5.31E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -444,13 +444,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.190709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.226881e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.340290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.096712e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -475,10 +475,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.3893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3665s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.66E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4060s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -511,9 +511,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240828564875] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.5317s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2541s for 90112 events => throughput is 3.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5452s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2555s for 90112 events => throughput is 3.53E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -529,13 +529,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.336305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.160841e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.211534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.259803e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -559,9 +559,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8115s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -594,9 +594,9 @@ DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0342240375655276] fbridge_mode=1 [UNWEIGHT] Wrote 1858 events (found 1863 events) - [COUNTERS] PROGRAM TOTAL : 1.7165s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7064s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 90112 events => throughput is 9.10E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -610,42 +610,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.199365e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.857446e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.502648e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.233421e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.857624e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825838e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.186054e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.145470e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.876331e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.808566e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.585645e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.481866e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.849572e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802589e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.508879e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.508007e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index ca501dcd31..e3380f08bb 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,30 +1,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:27:08 +DATE: 2024-09-02_11:10:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.6268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s - [COUNTERS] Fortran MEs ( 1 ) : 2.3059s for 8192 events => throughput is 3.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3167s + [COUNTERS] Fortran MEs ( 1 ) : 2.2659s for 8192 events => throughput is 3.62E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3167s - [COUNTERS] Fortran MEs ( 1 ) : 2.2398s for 8192 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s + [COUNTERS] Fortran MEs ( 1 ) : 2.2686s for 8192 events => throughput is 3.61E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 26.5314s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7959s - [COUNTERS] Fortran MEs ( 1 ) : 24.7355s for 90112 events => throughput is 3.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.7579s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8126s + [COUNTERS] Fortran MEs ( 1 ) : 24.9453s for 90112 events => throughput is 3.61E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028207996E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7394s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3217s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4123s for 8192 events => throughput is 3.40E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s + [COUNTERS] PROGRAM TOTAL : 2.7584s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3213s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4318s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0054s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 28.5116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8255s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.6809s for 90112 events => throughput is 3.38E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 28.6425s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8348s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.8024s for 90112 events => throughput is 3.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.560225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.524694e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.560475e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516868e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.5913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2700s for 8192 events => throughput is 6.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + [COUNTERS] PROGRAM TOTAL : 1.6021s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3244s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2748s for 8192 events => throughput is 6.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123633E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 15.7013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7986s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.9000s for 90112 events => throughput is 6.48E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 16.0101s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8332s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1740s for 90112 events => throughput is 6.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.736764e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.665998e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.703656e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.545421e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5540s for 8192 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 0.8914s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3263s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5634s for 8192 events => throughput is 1.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 7.9712s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8047s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1647s for 90112 events => throughput is 1.46E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 8.0725s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8311s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.2398s for 90112 events => throughput is 1.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.506854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.483755e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.513652e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.480557e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8210s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3243s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4952s for 8192 events => throughput is 1.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.8286s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3250s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5021s for 8192 events => throughput is 1.63E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 7.2734s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8012s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4708s for 90112 events => throughput is 1.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 7.3795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8232s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5548s for 90112 events => throughput is 1.62E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.648208e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.673020e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.706590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.683149e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6532s for 8192 events => throughput is 1.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9729s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3250s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6460s for 8192 events => throughput is 1.27E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 8.9590s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8109s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.1462s for 90112 events => throughput is 1.26E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s + [COUNTERS] PROGRAM TOTAL : 9.0165s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8262s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1883s for 90112 events => throughput is 1.25E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.277931e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273578e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.285067e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277348e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7965s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 8192 events => throughput is 4.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + [COUNTERS] PROGRAM TOTAL : 0.7984s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7585s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 8192 events => throughput is 4.10E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0200s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123665E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.4439s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2302s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1938s for 90112 events => throughput is 4.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s + [COUNTERS] PROGRAM TOTAL : 2.4531s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1934s for 90112 events => throughput is 4.66E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.242427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.222296e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.554723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.524950e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.857180e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.852922e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.230140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.236218e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.854667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.863303e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.205345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.201958e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.855473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.851908e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.682150e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.683062e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 5420c542eb..211fa0151b 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:29:30 +DATE: 2024-09-02_11:12:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.5449s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3117s - [COUNTERS] Fortran MEs ( 1 ) : 2.2332s for 8192 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6070s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3193s + [COUNTERS] Fortran MEs ( 1 ) : 2.2878s for 8192 events => throughput is 3.58E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5504s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3168s - [COUNTERS] Fortran MEs ( 1 ) : 2.2336s for 8192 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s + [COUNTERS] Fortran MEs ( 1 ) : 2.2865s for 8192 events => throughput is 3.58E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 26.4545s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7964s - [COUNTERS] Fortran MEs ( 1 ) : 24.6581s for 90112 events => throughput is 3.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.7407s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8076s + [COUNTERS] Fortran MEs ( 1 ) : 24.9330s for 90112 events => throughput is 3.61E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680052401606547E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.6995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3738s for 8192 events => throughput is 3.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 2.7184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3253s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3879s for 8192 events => throughput is 3.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161357558617576E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 27.8592s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7949s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.0596s for 90112 events => throughput is 3.46E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 28.2547s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8365s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4131s for 90112 events => throughput is 3.41E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.591819e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508792e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.592501e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516690e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680037387484579E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3162s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6552s for 8192 events => throughput is 1.25E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.0035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6685s for 8192 events => throughput is 1.23E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161341270162819E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 8.9875s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7891s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.1967s for 90112 events => throughput is 1.25E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 9.2668s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8432s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.4220s for 90112 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.273845e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.238352e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.282434e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.217968e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2820s for 8192 events => throughput is 2.90E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 0.6368s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3002s for 8192 events => throughput is 2.73E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 4.9168s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7970s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1187s for 90112 events => throughput is 2.89E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 5.1037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8573s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2453s for 90112 events => throughput is 2.78E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.971452e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898486e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.958665e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.915208e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.5744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3150s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 8192 events => throughput is 3.17E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.5834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2600s for 8192 events => throughput is 3.15E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 4.6744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7952s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8781s for 90112 events => throughput is 3.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.7219s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8280s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8930s for 90112 events => throughput is 3.11E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.291495e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.224802e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.311470e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.245567e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680052283166904E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6451s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3196s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3242s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6538s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3229s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3297s for 8192 events => throughput is 2.48E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161383186590445E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 5.3588s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7925s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5652s for 90112 events => throughput is 2.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 5.4776s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8422s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6341s for 90112 events => throughput is 2.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.561524e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.465617e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.564740e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.529929e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6680077090677233E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7580s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 8192 events => throughput is 4.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + [COUNTERS] PROGRAM TOTAL : 0.7991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7619s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0174s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161404612259676E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.4124s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2210s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1742s for 90112 events => throughput is 5.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + [COUNTERS] PROGRAM TOTAL : 2.4565s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2645s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1747s for 90112 events => throughput is 5.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0174s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.251422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.209228e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.470233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.460189e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.297381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.296236e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.335894e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326771e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.309724e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.295726e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.336754e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322025e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.306739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292225e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.655313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655822e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index ba367faa12..bfd919434f 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:31:28 +DATE: 2024-09-02_11:14:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.5441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3094s - [COUNTERS] Fortran MEs ( 1 ) : 2.2348s for 8192 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3283s + [COUNTERS] Fortran MEs ( 1 ) : 2.3491s for 8192 events => throughput is 3.49E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5333s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3136s - [COUNTERS] Fortran MEs ( 1 ) : 2.2197s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6836s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3360s + [COUNTERS] Fortran MEs ( 1 ) : 2.3476s for 8192 events => throughput is 3.49E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 26.3540s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7868s - [COUNTERS] Fortran MEs ( 1 ) : 24.5673s for 90112 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.6096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8766s + [COUNTERS] Fortran MEs ( 1 ) : 25.7330s for 90112 events => throughput is 3.50E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679974424193742E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7700s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3202s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4448s for 8192 events => throughput is 3.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 2.8645s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5256s for 8192 events => throughput is 3.24E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0055s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161304067553537E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 28.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8261s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.9138s for 90112 events => throughput is 3.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 29.0052s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8441s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.1557s for 90112 events => throughput is 3.32E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0054s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.369003e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464819e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.361318e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464133e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679974345453326E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.5711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3281s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2400s for 8192 events => throughput is 6.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.5765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3234s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2502s for 8192 events => throughput is 6.55E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161303969775166E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 15.3185s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7940s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.5219s for 90112 events => throughput is 6.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.6221s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8267s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.7926s for 90112 events => throughput is 6.53E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.037738e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.901453e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.020726e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.846222e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5530s for 8192 events => throughput is 1.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8933s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5610s for 8192 events => throughput is 1.46E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 7.9219s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8203s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1001s for 90112 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 7.9940s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8231s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1692s for 90112 events => throughput is 1.46E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.526224e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504497e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.532099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.491789e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4864s for 8192 events => throughput is 1.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3252s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4932s for 8192 events => throughput is 1.66E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 7.3372s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8007s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5349s for 90112 events => throughput is 1.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.2217s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8152s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4050s for 90112 events => throughput is 1.67E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.753200e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.707424e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.745221e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.727329e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6591s for 8192 events => throughput is 1.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6692s for 8192 events => throughput is 1.22E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 9.0125s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8113s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.1991s for 90112 events => throughput is 1.25E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [COUNTERS] PROGRAM TOTAL : 9.1174s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8280s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2874s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.283850e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.256200e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.289918e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250184e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.668e-07 [7.6679976038108255E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8017s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s + [COUNTERS] PROGRAM TOTAL : 0.8003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 8192 events => throughput is 4.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0201s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.616e-07 [7.6161305624152697E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1833 events (found 1838 events) - [COUNTERS] PROGRAM TOTAL : 2.4327s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1938s for 90112 events => throughput is 4.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s + [COUNTERS] PROGRAM TOTAL : 2.5121s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2973s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1947s for 90112 events => throughput is 4.63E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0200s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.252966e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.202288e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.566943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.538720e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.826739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827213e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.206857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.207516e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.830268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827263e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.203053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.171035e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.830312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.831697e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.670866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.676692e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 70934f5a33..3ef5a0426f 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:25:56 +DATE: 2024-09-02_11:09:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6295s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6420s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6335s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3729s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.43E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2915s - [COUNTERS] Fortran MEs ( 1 ) : 0.0902s for 90112 events => throughput is 9.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4021s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3094s + [COUNTERS] Fortran MEs ( 1 ) : 0.0927s for 90112 events => throughput is 9.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3675s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0887s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0923s for 90112 events => throughput is 9.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.022296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000425e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.016823e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004154e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3776s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3725s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.77E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3157s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0479s for 90112 events => throughput is 1.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3454s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0498s for 90112 events => throughput is 1.81E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.952768e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.864873e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.962384e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.965020e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3055s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 90112 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.3169s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 90112 events => throughput is 2.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.119431e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094157e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.291456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.245967e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3759s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3793s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2976s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 90112 events => throughput is 3.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2835s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0288s for 90112 events => throughput is 3.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.232114e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093537e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.515132e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505628e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.45E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 90112 events => throughput is 2.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.3245s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2901s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 90112 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.776132e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.858695e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.951337e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.223033e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620711] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.8051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8115s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.7119s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 90112 events => throughput is 1.14E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7282s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 90112 events => throughput is 1.08E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.431308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.084025e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.846495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.467041e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.587887e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542591e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.941009e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.896702e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.564624e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496531e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.932049e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940593e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.607572e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.503147e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251419e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.228305e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index b728cf01ba..e45c8dd3da 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:26:20 +DATE: 2024-09-02_11:09:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6316s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6234s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 9.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6294s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3687s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2848s - [COUNTERS] Fortran MEs ( 1 ) : 0.0896s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3990s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3066s + [COUNTERS] Fortran MEs ( 1 ) : 0.0924s for 90112 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590691487682503] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.89E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752638362648882] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0898s for 90112 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3847s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0910s for 90112 events => throughput is 9.90E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025283e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.014615e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024362e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590691359923711] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.09E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752638324844145] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2961s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0298s for 90112 events => throughput is 3.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3158s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2859s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0296s for 90112 events => throughput is 3.05E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.167979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.269308e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.382172e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.238619e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3694s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3746s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3725s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.44E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2971s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 90112 events => throughput is 4.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2851s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 90112 events => throughput is 4.52E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.033271e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.715578e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.297828e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.352269e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3732s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.71E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2872s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 90112 events => throughput is 4.79E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3384s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3179s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 90112 events => throughput is 4.46E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.272138e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.261579e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.513966e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.564280e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590692025204030] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3700s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3673s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752638860024578] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2904s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 90112 events => throughput is 4.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3283s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 90112 events => throughput is 3.90E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.412227e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.404995e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.848494e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.960556e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590692347055715] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.8035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8105s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.73E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752639119626163] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.7078s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7523s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 90112 events => throughput is 1.08E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.509110e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.178526e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.721348e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.421541e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.896109e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.232026e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.210427e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092431e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.823420e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.371466e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.187530e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089703e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.474730e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.938227e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.680141e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621151e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index c19c807968..13ea6d6457 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -6,8 +6,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:26:43 +DATE: 2024-09-02_11:10:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6345s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6262s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6471s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6387s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s - [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s + [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.22E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3772s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2870s - [COUNTERS] Fortran MEs ( 1 ) : 0.0902s for 90112 events => throughput is 9.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3060s + [COUNTERS] Fortran MEs ( 1 ) : 0.0918s for 90112 events => throughput is 9.82E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3797s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3661s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 90112 events => throughput is 9.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3704s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0909s for 90112 events => throughput is 9.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.924593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.886423e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.937019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.874996e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3769s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.79E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3109s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0486s for 90112 events => throughput is 1.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3326s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0488s for 90112 events => throughput is 1.85E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895255e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911267e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.012680e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968214e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.2948s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2652s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 90112 events => throughput is 3.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3191s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 90112 events => throughput is 3.02E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241118e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.403824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286119e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3730s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0283s for 90112 events => throughput is 3.19E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.3399s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 90112 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.352796e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473724e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.508682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.561982e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.3718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3785s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.3059s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 90112 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3246s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 90112 events => throughput is 2.75E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998636e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.874323e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181761e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.161810e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3059 [0.30590697378458576] fbridge_mode=1 [UNWEIGHT] Wrote 1609 events (found 1614 events) - [COUNTERS] PROGRAM TOTAL : 0.8091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8058s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8064s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30752644069868873] fbridge_mode=1 [UNWEIGHT] Wrote 1788 events (found 1793 events) - [COUNTERS] PROGRAM TOTAL : 1.7170s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7220s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.09E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.284948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.095622e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.816586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536574e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.563395e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536213e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.965519e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962484e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.627148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497581e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.966795e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.884190e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.604319e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529353e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244921e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.228927e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 223cb89e40..ba2780b6c2 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:24:39 +DATE: 2024-09-02_11:08:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.7783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7375s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7962s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7546s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4046s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3636s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4104s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3684s + [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7079s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2586s - [COUNTERS] Fortran MEs ( 1 ) : 0.4493s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2815s + [COUNTERS] Fortran MEs ( 1 ) : 0.4571s for 90112 events => throughput is 1.97E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848757] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4134s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7289s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4734s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7632s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4829s for 90112 events => throughput is 1.87E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.887422e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.876183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910992e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3943s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3962s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.34E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.5124s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2655s for 90112 events => throughput is 3.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5530s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2721s for 90112 events => throughput is 3.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.349577e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.355488e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.361639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318491e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3846s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3868s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4238s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1672s for 90112 events => throughput is 5.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4438s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1694s for 90112 events => throughput is 5.32E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.326838e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.232887e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.346593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.260759e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3794s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3653s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.92E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4133s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 90112 events => throughput is 5.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4280s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2729s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1547s for 90112 events => throughput is 5.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.665657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.790244e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.840518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.804559e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.79E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4886s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2539s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2342s for 90112 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5258s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2451s for 90112 events => throughput is 3.68E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.589281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615689e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.561336e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.658613e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848728] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.7974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8129s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413069] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.6943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 90112 events => throughput is 9.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7214s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 90112 events => throughput is 9.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.299319e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991537e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.562666e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.441995e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.907907e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.877774e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.696731e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.619188e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.935865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871906e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.015733e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.013278e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.923062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.893751e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.715601e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.689301e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index b2e100edc8..bb0fae6bdf 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -2,11 +2,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -14,12 +14,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:25:04 +DATE: 2024-09-02_11:08:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.7803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7389s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7664s + [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,8 +83,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] PROGRAM TOTAL : 0.4065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3644s [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7586s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2978s - [COUNTERS] Fortran MEs ( 1 ) : 0.4608s for 90112 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7405s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2818s + [COUNTERS] Fortran MEs ( 1 ) : 0.4587s for 90112 events => throughput is 1.96E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690951135742296] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4293s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4120s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3701s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411953404075810] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7195s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4515s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7277s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4536s for 90112 events => throughput is 1.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.018305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018259e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.967614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.007607e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,8 +214,8 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690947248027847] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] PROGRAM TOTAL : 0.3896s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411949727730686] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4415s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1830s for 90112 events => throughput is 4.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4603s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1846s for 90112 events => throughput is 4.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.788292e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.698069e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.716767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.690804e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -295,8 +295,8 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) [COUNTERS] PROGRAM TOTAL : 0.3758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 8192 events => throughput is 9.09E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.3601s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0976s for 90112 events => throughput is 9.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3662s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0984s for 90112 events => throughput is 9.16E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.159373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.017679e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.212730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.191081e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3634s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.3449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2530s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3609s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0920s for 90112 events => throughput is 9.79E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.712881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.778277e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.945039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.668348e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690951463003671] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3793s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3822s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.04E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411953494761157] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.3787s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2531s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1252s for 90112 events => throughput is 7.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4124s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1281s for 90112 events => throughput is 7.03E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.999423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.876672e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.895597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.873464e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956060520207] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.7963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8062s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8025s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411957327369002] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.6921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6834s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7214s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.04E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.399026e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.086629e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.601106e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.446297e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.120477e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.792358e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.431044e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.318678e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.154807e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.741693e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.369838e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.332831e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.738148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.395625e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.056397e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.005588e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index f71519e4b2..ff3b8bf7c7 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -20,11 +20,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-22_03:25:30 +DATE: 2024-09-02_11:08:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.7988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7579s - [COUNTERS] Fortran MEs ( 1 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7642s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3630s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s + [COUNTERS] Fortran MEs ( 1 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7188s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2686s - [COUNTERS] Fortran MEs ( 1 ) : 0.4502s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7537s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2906s + [COUNTERS] Fortran MEs ( 1 ) : 0.4631s for 90112 events => throughput is 1.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.4129s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3682s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4202s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0449s for 8192 events => throughput is 1.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.7399s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4812s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4883s for 90112 events => throughput is 1.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.847731e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.868412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855305e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3649s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0238s for 8192 events => throughput is 3.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3990s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.5275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2623s for 90112 events => throughput is 3.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5614s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2701s for 90112 events => throughput is 3.34E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.387840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.371536e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.398750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.54E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4192s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2545s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1642s for 90112 events => throughput is 5.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4425s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1688s for 90112 events => throughput is 5.34E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.285681e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286155e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.379658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374034e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3780s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1539s for 90112 events => throughput is 5.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.4350s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1546s for 90112 events => throughput is 5.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.964392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.918164e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.009824e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.938830e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.3899s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.4849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2294s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2360s for 90112 events => throughput is 3.82E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.850222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.768467e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.898534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.824225e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.69 [44.690956743203600] fbridge_mode=1 [UNWEIGHT] Wrote 1616 events (found 1621 events) - [COUNTERS] PROGRAM TOTAL : 0.7956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7919s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.79E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.8164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.41 [44.411959201897950] fbridge_mode=1 [UNWEIGHT] Wrote 1775 events (found 1780 events) - [COUNTERS] PROGRAM TOTAL : 1.6977s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 90112 events => throughput is 9.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7393s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7287s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 90112 events => throughput is 9.21E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155579e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.004903e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420646e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.917316e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.892002e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.497523e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.666542e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.925954e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.876265e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.067763e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.014192e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.932333e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.868624e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.742049e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.693794e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***