Merge remote-tracking branch 'upstream/master'

Jooorgen · Aug 16, 2023 · 3821d30 · 3821d30
2 parents d889085 + fe3cdf7
commit 3821d30
Show file tree

Hide file tree

Showing 284 changed files with 9,197 additions and 7,563 deletions.
diff --git a/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/commit.GIT b/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/commit.GIT
@@ -1 +1 @@
-91f37b6aa
+941832b9b
diff --git a/...p/CODEGEN/MG5aMC_patches/PROD/counters.cc → ...SA_OUTPUT/MG5aMC_patches/PROD/counters.cc b/...p/CODEGEN/MG5aMC_patches/PROD/counters.cc → ...SA_OUTPUT/MG5aMC_patches/PROD/counters.cc
diff --git a/...EN/MG5aMC_patches/PROD/fbridge_common.inc → ...UT/MG5aMC_patches/PROD/fbridge_common.inc b/...EN/MG5aMC_patches/PROD/fbridge_common.inc → ...UT/MG5aMC_patches/PROD/fbridge_common.inc
diff --git a/...EGEN/MG5aMC_patches/PROD/ompnumthreads.cc → ...TPUT/MG5aMC_patches/PROD/ompnumthreads.cc b/...EGEN/MG5aMC_patches/PROD/ompnumthreads.cc → ...TPUT/MG5aMC_patches/PROD/ompnumthreads.cc
diff --git a/...acpp/CODEGEN/MG5aMC_patches/PROD/patch.P1 → ...PP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/...acpp/CODEGEN/MG5aMC_patches/PROD/patch.P1 → ...PP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
diff --git a/.../CODEGEN/MG5aMC_patches/PROD/patch.common → ...A_OUTPUT/MG5aMC_patches/PROD/patch.common b/.../CODEGEN/MG5aMC_patches/PROD/patch.common → ...A_OUTPUT/MG5aMC_patches/PROD/patch.common
@@ -35,17 +35,6 @@ index 617f10b93..dbe08b846 100644
 +
 +cleanall: cleanSource
 +	for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done;
-diff --git b/epochX/cudacpp/gg_tt.mad/Source/vector.inc a/epochX/cudacpp/gg_tt.mad/Source/vector.inc
-index 863eebbc7..92254c0f2 100644
---- b/epochX/cudacpp/gg_tt.mad/Source/vector.inc
-+++ a/epochX/cudacpp/gg_tt.mad/Source/vector.inc
-@@ -28,4 +28,5 @@ C     BECAUSE IT DOES NOT GO THROUGH THE CPP PREPROCESSOR
- C     (see https://github.com/madgraph5/madgraph4gpu/issues/458).
- C     
-       INTEGER VECSIZE_MEMMAX
--      PARAMETER (VECSIZE_MEMMAX=16384)
-+      PARAMETER (VECSIZE_MEMMAX=16384) ! NB: 16k events per GPU grid is the minimum required to fill a V100 GPU
-+c     PARAMETER (VECSIZE_MEMMAX=32) ! NB: workaround for out-of-memory on Juwels: 32 is enough for no-CUDA builds (issue #498)
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
 index 348c283be..74db44d84 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
@@ -319,10 +308,10 @@ index 57f5f7bb9..bd3c24228 100644
 
  BIASLIBDIR=../../../lib/
 diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
-index 27cd896a7..c1e54d3cb 100755
+index 8f8df219d..7624b9f55 100755
 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
 +++ a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
-@@ -4164,7 +4164,8 @@ class RunCardLO(RunCard):
+@@ -4187,7 +4187,8 @@ class RunCardLO(RunCard):
          self.add_param('mxxmin4pdg',[-1.], system=True)
          self.add_param('mxxpart_antipart', [False], system=True)
 

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h
@@ -18,6 +18,7 @@
 #include <cassert>
 #include <cmath>
 #include <cstring>
+#include <filesystem>
 #include <iostream>
 #include <memory>
 #include <type_traits>
@@ -244,14 +245,21 @@ namespace mg5amcCpu
     }
     std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads
               << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl;
-    CPPProcess process( /*verbose=*/false );
     m_pmek.reset( new MatrixElementKernelDevice( m_devMomentaC, m_devGs, m_devRndHel, m_devRndCol, m_devMEs, m_devSelHel, m_devSelCol, m_gpublocks, m_gputhreads ) );
 #else
     std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl;
-    CPPProcess process( /*verbose=*/false );
     m_pmek.reset( new MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) );
 #endif // MGONGPUCPP_GPUIMPL
-    process.initProc( "../../Cards/param_card.dat" );
+    // Create a process object, read param card and set parameters
+    // FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
+    // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads?
+    CPPProcess process( /*verbose=*/false );
+    std::string paramCard = "../../Cards/param_card.dat";
+    if( !std::filesystem::exists( paramCard ) )
+    {
+      paramCard = "../" + paramCard;
+    }
+    process.initProc( paramCard );
   }
 
 #ifdef MGONGPUCPP_GPUIMPL

diff --git a/...X/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/...X/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc
@@ -278,10 +278,10 @@ main( int argc, char** argv )
   const std::string procKey = "0a ProcInit";
   timermap.start( procKey );
 
-  // Create a process object
+  // Create a process object, read param card and set parameters
+  // FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
+  // FIXME: the CPPProcess should really be a singleton? (for instance, in bridge mode this will be called twice here?)
   CPPProcess process( verbose );
-
-  // Read param_card and set parameters
   process.initProc( "../../Cards/param_card.dat" );
   const fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak)
   //const fptype energy = 91.2; // Ecms = 91.2 GeV (Z peak)

diff --git a/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc
@@ -49,11 +49,7 @@ extern "C"
 #ifdef MGONGPUCPP_GPUIMPL
     GpuRuntime::setUp();
 #endif
-    // Create a process object, read parm card and set parameters
-    // FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
-    // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads?
-    CPPProcess process( /*verbose=*/false );
-    process.initProc( "../../Cards/param_card.dat" );
+    // (NB: CPPProcess::initProc no longer needs to be executed here because it is called in the Bridge constructor)
     // FIXME: disable OMP in Bridge when called from Fortran
     *ppbridge = new Bridge<FORTRANFPTYPE>( *pnevtF, *pnparF, *pnp4F );
   }

diff --git a/...GIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/...GIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc
@@ -251,12 +251,12 @@ namespace mg5amcCpu
                        fptype* allDenominators,    // output: multichannel denominators[nevt], running_sum_over_helicities
 #endif
                        bool* isGoodHel )           // output: isGoodHel[ncomb] - device array (CUDA implementation)
-  { /* clang-format on */
-    fptype allMEsLast = 0;
+  {                                                         /* clang-format on */
     const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) in grid
-    allMEs[ievt] = 0;
     for( int ihel = 0; ihel < ncomb; ihel++ )
     {
+      // NEW IMPLEMENTATION OF GETGOODHEL (#630): RESET THE RUNNING SUM OVER HELICITIES TO 0 BEFORE ADDING A NEW HELICITY
+      allMEs[ievt] = 0;
       // NB: calculate_wavefunctions ADDS |M|^2 for a given ihel to the running sum of |M|^2 over helicities for the given event(s)
       constexpr fptype_sv* jamp2_sv = nullptr; // no need for color selection during helicity filtering
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -265,12 +265,11 @@ namespace mg5amcCpu
 #else
       calculate_wavefunctions( ihel, allmomenta, allcouplings, allMEs, jamp2_sv );
 #endif
-      if( allMEs[ievt] != allMEsLast )
+      if( allMEs[ievt] != 0 ) // NEW IMPLEMENTATION OF GETGOODHEL (#630): COMPARE EACH HELICITY CONTRIBUTION TO 0
       {
         //if ( !isGoodHel[ihel] ) std::cout << "sigmaKin_getGoodHel ihel=" << ihel << " TRUE" << std::endl;
         isGoodHel[ihel] = true;
       }
-      allMEsLast = allMEs[ievt]; // running sum up to helicity ihel for event ievt
     }
   }
 #else
@@ -289,19 +288,11 @@ namespace mg5amcCpu
     //assert( (size_t)(allMEs) %% mgOnGpu::cppAlign == 0 ); // SANITY CHECK: require SIMD-friendly alignment [COMMENT OUT TO TEST MISALIGNED ACCESS]
     // Allocate arrays at build time to contain at least 16 events (or at least neppV events if neppV>16, e.g. in future VPUs)
     constexpr int maxtry0 = std::max( 16, neppV ); // 16, but at least neppV (otherwise the npagV loop does not even start)
-    fptype allMEsLast[maxtry0] = { 0 };            // allocated at build time: maxtry0 must be a constexpr
     // Loop over only nevt events if nevt is < 16 (note that nevt is always >= neppV)
     assert( nevt >= neppV );
     const int maxtry = std::min( maxtry0, nevt ); // 16, but at most nevt (avoid invalid memory access if nevt<maxtry0)
 
-    // PART 0 - INITIALISATION (before calculate_wavefunctions)
-    // Reset the "matrix elements" - running sums of |M|^2 over helicities for the given event
-    for( int ievt = 0; ievt < maxtry; ++ievt )
-    {
-      allMEs[ievt] = 0; // all zeros
-    }
-
-    // PART 1 - HELICITY LOOP: CALCULATE WAVEFUNCTIONS
+    // HELICITY LOOP: CALCULATE WAVEFUNCTIONS
     const int npagV = maxtry / neppV;
 #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT
     // Mixed fptypes #537: float for color algebra and double elsewhere
@@ -320,6 +311,16 @@ namespace mg5amcCpu
 #endif
       for( int ihel = 0; ihel < ncomb; ihel++ )
       {
+        // NEW IMPLEMENTATION OF GETGOODHEL (#630): RESET THE RUNNING SUM OVER HELICITIES TO 0 BEFORE ADDING A NEW HELICITY
+        for( int ieppV = 0; ieppV < neppV; ++ieppV )
+        {
+          const int ievt = ievt00 + ieppV;
+          allMEs[ievt] = 0;
+#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT
+          const int ievt2 = ievt00 + ieppV + neppV;
+          allMEs[ievt2] = 0;
+#endif
+        }
         constexpr fptype_sv* jamp2_sv = nullptr; // no need for color selection during helicity filtering
         //std::cout << "sigmaKin_getGoodHel ihel=" << ihel << ( isGoodHel[ihel] ? " true" : " false" ) << std::endl;
 #ifdef MGONGPU_SUPPORTS_MULTICHANNEL
@@ -331,22 +332,18 @@ namespace mg5amcCpu
         for( int ieppV = 0; ieppV < neppV; ++ieppV )
         {
           const int ievt = ievt00 + ieppV;
-          const bool differs = ( allMEs[ievt] != allMEsLast[ievt] );
-          if( differs )
+          if( allMEs[ievt] != 0 ) // NEW IMPLEMENTATION OF GETGOODHEL (#630): COMPARE EACH HELICITY CONTRIBUTION TO 0
           {
             //if ( !isGoodHel[ihel] ) std::cout << "sigmaKin_getGoodHel ihel=" << ihel << " TRUE" << std::endl;
             isGoodHel[ihel] = true;
           }
-          allMEsLast[ievt] = allMEs[ievt]; // running sum up to helicity ihel
 #if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT
           const int ievt2 = ievt00 + ieppV + neppV;
-          const bool differs2 = ( allMEs[ievt2] != allMEsLast[ievt2] );
-          if( differs2 )
+          if( allMEs[ievt2] != 0 ) // NEW IMPLEMENTATION OF GETGOODHEL (#630): COMPARE EACH HELICITY CONTRIBUTION TO 0
           {
             //if ( !isGoodHel[ihel] ) std::cout << "sigmaKin_getGoodHel ihel=" << ihel << " TRUE" << std::endl;
             isGoodHel[ihel] = true;
           }
-          allMEsLast[ievt2] = allMEs[ievt2]; // running sum up to helicity ihel
 #endif
         }
       }

diff --git a/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc b/...hX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc
@@ -71,6 +71,8 @@ struct CPUTest : public CUDA_CPU_TestBase
     , hstSelCol( nevt )
     , hstIsGoodHel( CPPProcess::ncomb )
   {
+    // FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
+    // FIXME: the CPPProcess should really be a singleton?
     process.initProc( "../../Cards/param_card.dat" );
   }
 
@@ -183,6 +185,8 @@ struct CUDATest : public CUDA_CPU_TestBase
     , devSelCol( nevt )
     , devIsGoodHel( CPPProcess::ncomb )
   {
+    // FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
+    // FIXME: the CPPProcess should really be a singleton?
     process.initProc( "../../Cards/param_card.dat" );
   }
 

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py
@@ -1040,6 +1040,7 @@ def __init__(self, *args, **kwargs):
         for kwarg in kwargs: misc.sprint( 'kwargs[%s] = %s' %( kwarg, kwargs[kwarg] ) )
         super().__init__(*args, **kwargs)
         self.process_class = 'CPPProcess'
+        ###if self.in_madevent_mode: proc_id = kwargs['prefix']+1 # madevent+cudacpp (NB: HERE SELF.IN_MADEVENT_MODE DOES NOT WORK!)
         if 'prefix' in kwargs: proc_id = kwargs['prefix']+1 # madevent+cudacpp (ime+1 from ProcessExporterFortranMEGroup.generate_subprocess_directory)
         else: proc_id = 0 # standalone_cudacpp
         misc.sprint(proc_id)
@@ -1145,7 +1146,7 @@ def get_sigmaKin_lines(self, color_amplitudes, write=True):
         misc.sprint(self.support_multichannel)
         replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False)
         replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1
-        replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp'
+        replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead?
         if write:
             file = self.read_template_file(self.process_sigmaKin_function_template) % replace_dict
             file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_sigmaKin_function.inc (copyright)
@@ -1296,7 +1297,7 @@ def generate_process_files(self):
         """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links"""
         misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files')
         if self.include_multi_channel:
-            misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode')
+            misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode') # FIXME? use self.in_madevent_mode instead?
         else:
             misc.sprint('self.include_multi_channel is not yet defined: this is standalone_cudacpp mode') # see issue #473
         if self.matrix_elements[0].get('has_mirror_process'):

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
@@ -1,7 +1,7 @@
 # Copyright (C) 2020-2023 CERN and UCLouvain.
 # Licensed under the GNU Lesser General Public License (version 3 or later).
 # Created by: O. Mattelaer (Sep 2021) for the MG5aMC CUDACPP plugin.
-# Further modified by: O. Mattelaer, A. Valassi, J. Teig, Z. Wettersten (2021-2023) for the MG5aMC CUDACPP plugin.
+# Further modified by: O. Mattelaer, S. Roiser, A. Valassi, J. Teig, Z. Wettersten (2021-2023) for the MG5aMC CUDACPP plugin.
 
 import os
 
@@ -149,6 +149,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU):
 
     # AV (default from OM's tutorial) - add a debug printout
     def __init__(self, *args, **kwargs):
+        self.in_madevent_mode = False # see MR #747
         misc.sprint('Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter)')
         return super().__init__(*args, **kwargs)
 
@@ -200,7 +201,14 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag):
 	    cmdhistory is the list of command used so far.
 	    MG5options are all the options of the main interface
 	    outputflags is a list of options provided when doing the output command"""
-        misc.sprint('Entering PLUGIN_ProcessExporter.finalize')
+        misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode)
+        if self.in_madevent_mode:
+            self.add_input_for_banner()
+            if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL']
+            else: patchlevel = ''
+            path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_SA_OUTPUT')
+            if os.system(path + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0:
+                raise Exception('ERROR! the O/S call to patchMad.sh failed')
         return super().finalize(matrix_element, cmdhistory, MG5options, outputflag)
 
     # AV (default from OM's tutorial) - overload settings and add a debug printout
@@ -213,4 +221,12 @@ def modify_grouping(self, matrix_element):
         misc.sprint('Entering PLUGIN_ProcessExporter.modify_grouping')
         return False, matrix_element
 
+    # OM additional fixes for madevent+cudacpp mode
+    def add_input_for_banner(self):
+        # Note: this is only called in madevent mode (self.in_madevent_mode = True)
+        new_parameters = ["{'name':'cudacpp_backend', 'value':'CPP', 'include':False, 'hidden':False}"]
+        finput = open(pjoin(self.dir_path, 'bin', 'internal', 'plugin_run_card'), 'w')
+        for entry in new_parameters:
+            finput.write(entry)
+
 #------------------------------------------------------------------------------------
diff --git a/epochX/cudacpp/CODEGEN/patchMad.sh → ...EGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/patchMad.sh → ...EGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh
@@ -4,13 +4,16 @@
 # Created by: A. Valassi (Mar 2022) for the MG5aMC CUDACPP plugin.
 # Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
 
+set -e # immediate exit on error
+
 status=0
 
 scrdir=$(cd $(dirname $0); pwd)
 
 function usage()
 {
-  echo "Usage: $0 <process.[madonly|mad]> <vecsize> <patch_dir> [--nopatch|--upstream]"
+  echo "ERROR! Unknown command '$0 $*'"
+  echo "Usage: $0 <process_dir> <patch_dir> [--nopatch|--upstream]"
   exit 1 
 }
 
@@ -19,22 +22,19 @@ function usage()
 ###patchlevel=1 # [--nopatch] modify upstream MG5AMC but do not apply patch commands (reference to prepare new patches)
 patchlevel=2 # [DEFAULT] complete generation of cudacpp .sa/.mad (copy templates and apply patch commands)
 
-if [ "${1%.madonly}" == "$1" ] && [ "${1%.mad}" == "$1" ]; then
-  usage
-elif [ "$3" == "" ]; then
-  usage
-elif [ "$4" == "--nopatch" ]; then
-  if [ "$5" != "" ]; then usage; fi
+if [ "$2" == "" ]; then
+  usage $*
+elif [ "$3" == "--nopatch" ]; then
+  if [ "$4" != "" ]; then usage; fi
   patchlevel=1
-elif [ "$4" == "--upstream" ]; then
-  if [ "$5" != "" ]; then usage; fi
+elif [ "$3" == "--upstream" ]; then
+  if [ "$4" != "" ]; then usage; fi
   patchlevel=0
-elif [ "$4" != "" ]; then
-  usage
+elif [ "$3" != "" ]; then
+  usage $*
 fi
 dir=$1
-vecsize=$2
-dir_patches=$3
+dir_patches=$2
 ###echo "Current dir: $pwd"
 ###echo "Input dir to patch: $dir"
 
@@ -67,8 +67,9 @@ cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new
 # Patch the default Fortran code to provide the integration with the cudacpp plugin
 # (1) Process-independent patches
 touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) 
-\cp -dpr ${scrdir}/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/.clang-format ${dir} # new file
 \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file
+sed -i 's/2  = sde_strategy/1  = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419)
+sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419)
 if [ "${patchlevel}" == "2" ]; then
   cd ${dir}
   sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts
@@ -93,9 +94,6 @@ for p1dir in ${dir}/SubProcesses/P*; do
   ln -sf ../fbridge_common.inc . # new file
   \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file
   \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file
-  if [ "${dir%.mad}" == "$1" ]; then
-    \cp -dpr ${scrdir}/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/timer.h . # new file, already present via cudacpp in *.mad
-  fi
   if [ "${patchlevel}" == "2" ]; then
     echo "DEBUG: cd ${PWD}; patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1"
     if ! patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1; then status=1; fi