Merge branch 'cmsdy' (with madgraph5#968 and madgraph5#969 improvemen…

…ts - but not yet the latest upstream/master) into cmsdyps Fix conflicts in patch.P1 and patch.common (NB: the 968/969 improvements are now in the OLD sample_get_x)
valassi · Aug 22, 2024 · b747b6c · b747b6c
2 parents b7e11e2 + 348664c
commit b747b6c
Show file tree

Hide file tree

Showing 15 changed files with 8,340 additions and 9,399 deletions.
diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
@@ -1,5 +1,5 @@
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
-index 7bff4b945..0c5869973 100644
+index 7bff4b9455..0c58699731 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
 +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
 @@ -312,8 +312,10 @@ C      entries to the grid for the MC over helicity configuration
@@ -14,7 +14,7 @@ index 7bff4b945..0c5869973 100644
        IF(GROUPED_MC_GRID_STATUS.EQ.0) THEN
  C       If we were in the initialization phase of the grid for MC over
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
-index 4fbb8e6ba..77aff307b 100644
+index 4fbb8e6ba7..77aff307b8 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
 +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
 @@ -125,6 +125,7 @@ C     Continue only if IMODE is 0, 4 or 5
@@ -241,7 +241,7 @@ index 4fbb8e6ba..77aff307b 100644
        END
 
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
-index 1124a9164..f205ce6fd 100644
+index 1124a9164a..f205ce6fd9 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
 +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
 @@ -74,16 +74,95 @@ c      common/to_colstats/ncols,ncolflow,ncolalt,ic
@@ -396,7 +396,7 @@ index 1124a9164..f205ce6fd 100644
        open(unit=lun,file=tempname,status='old',ERR=20)
        fopened=.true.
 diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
-index e73e654d4..3072054f2 100644
+index e73e654d49..3072054f2d 100644
 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
 +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
 @@ -72,7 +72,10 @@ C

diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
@@ -76,8 +76,124 @@ index e18ba7c03..d038f9159 100644
  c************************************************************************
  c     Returns maxdim random numbers between 0 and 1, and the wgt
  c     associated with this set of points, and the iteration number
+@@ -1240,7 +1250,26 @@ c
+ c     Local
+ c
+       integer  im, ip,ij,icount,it_warned
+-      double precision xbin_min,xbin_max,ddum(maxdim),xo,y
++      double precision xbin_min,xbin_max,ddum(maxdim),xo,y 
++c
++c     Local (performance optimization #969)
++c
++      integer xbinarraydim
++      parameter (xbinarraydim=maxdim*lmaxconfigs)
++      double precision xbin_min0_array(maxdim, lmaxconfigs)
++      double precision xbin_max1_array(maxdim, lmaxconfigs)
++      logical xbin_min0_saved(maxdim, lmaxconfigs)
++      logical xbin_max1_saved(maxdim, lmaxconfigs)
++      save xbin_min0_array, xbin_max1_array
++      save xbin_min0_saved, xbin_max1_saved
++      data xbin_min0_saved/xbinarraydim*.false./
++      data xbin_max1_saved/xbinarraydim*.false./
++
++      character*255 env_name, env_value
++      integer env_length, env_status
++      logical first, skipxbinchecks
++      data first, skipxbinchecks/.true., .false./
++      save first, skipxbinchecks
+ c
+ c     External
+ c
+@@ -1291,15 +1320,29 @@ c         write(*,'(7f11.5)')(ddum(j)*real(ng),j=1,dim)
+       endif
+       if (ituple .eq. 1) then
+ c         write(*,*) 'Getting variable',ipole,j,minvar(j,ipole)
+-         xbin_min = xbin(xmin,minvar(j,ipole))
+-         xbin_max = xbin(xmax,minvar(j,ipole))
+-         if (xbin_min .gt. xbin_max-1) then
+-c            write(*,'(a,4e15.4)') 'Bad limits',xbin_min,xbin_max,
+-c     &           xmin,xmax
+-c            xbin_max=xbin_min+1d-10
+-            xbin_max = xbin(xmax,minvar(j,ipole))
+-            xbin_min = min(xbin(xmin,minvar(j,ipole)), xbin_max)
+-         endif
++
++        if(xmax.ne.1 .or. .not.xbin_max1_saved(j,ipole)) then
++          xbin_max = xbin(xmax, minvar(j,ipole))
++          if(xmax.eq.1) then
++            xbin_max1_array(j,ipole) = xbin_max
++            xbin_max1_saved(j,ipole) = .true.
++          endif
++        else
++          xbin_max = xbin_max1_array(j,ipole)
++        endif
++
++        if(xmin.ne.0 .or. .not.xbin_min0_saved(j,ipole)) then
++          xbin_min = xbin(xmin, minvar(j,ipole))
++          if (xbin_min .gt. xbin_max-1) then
++            xbin_min = min(xbin_min, xbin_max)
++          endif
++          if(xmin.eq.0) then
++            xbin_min0_array(j,ipole) = xbin_min
++            xbin_min0_saved(j,ipole) = .true.
++          endif
++        else
++          xbin_min = xbin_min0_array(j,ipole)
++        endif
+ c
+ c     Line which allows us to keep choosing same x
+ c
+@@ -1312,10 +1355,10 @@ c            write(*,*) 'Reusing num',j,nzoom,tx(2,j)
+             call ntuple(ddum(j),max(xbin_min,dble(int(tx(2,j)))),
+      $           min(xbin_max,dble(int(tx(2,j))+1)),j,ipole)
+
+-            if(max(xbin_min,dble(int(tx(2,j)))).gt.
+-     $           min(xbin_max,dble(int(tx(2,j))+1))) then
++c           if(max(xbin_min,dble(int(tx(2,j)))).gt.
++c    $           min(xbin_max,dble(int(tx(2,j))+1))) then
+ c               write(*,*) 'not good'
+-            endif
++c           endif
+
+ c            write(*,'(2i6,4e15.5)') nzoom,j,ddum(j),tx(2,j),
+ c     $           max(xbin_min,dble(int(tx(2,j)))),
+@@ -1378,7 +1421,19 @@ c     to the fact that the grids are required to be separated by 1e-14. Since
+ c     double precision is about 18 digits, we expect things to agree to
+ c     3 digit accuracy.
+ c
+-      if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
++      if (first) then
++        env_name = 'CUDACPP_RUNTIME_SKIPXBINCHECKS'
++        call get_environment_variable(env_name, env_value, env_length, env_status)
++        if( env_status.eq.0 ) then
++          skipxbinchecks = .true.
++        endif
++      endif
++
++      if (skipxbinchecks) then
++        if (first) then
++          write(6,*) 'WARNING: skipping xbin checks (CUDACPP_RUNTIME_SKIPXBINCHECKS is set)'
++        endif
++      else if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
+          if (icount .lt. 5) then
+             write(*,'(a,i4,2e14.6,1e12.4)')
+      &           'Warning xbin not returning correct x', ij,
+@@ -1389,10 +1444,11 @@ c
+          endif
+          icount=icount+1
+       endif
+-      if (x .lt. xmin .or. x .gt. xmax) then
++      first = .false.
++c     if (x .lt. xmin .or. x .gt. xmax) then
+ c         write(*,'(a,4i4,2f24.16,1e10.2)') 'Bad x',ij,int(xbin_min),ip,
+ c     &        int(xbin_max),xmin,x,xmax-xmin
+-      endif
++c     endif
+
+       wgt = wgt * xo * dble(xbin_max-xbin_min)
+ c      print*,'Returning x',ij,ipole,j,x
 diff --git b/epochX/cudacpp/gg_tt.mad/Source/genps.inc a/epochX/cudacpp/gg_tt.mad/Source/genps.inc
-index a59181c70..af7e0efbc 100644
+index a59181c708..af7e0efbce 100644
 --- b/epochX/cudacpp/gg_tt.mad/Source/genps.inc
 +++ a/epochX/cudacpp/gg_tt.mad/Source/genps.inc
 @@ -30,7 +30,8 @@ c*************************************************************************

diff --git a/epochX/cudacpp/gg_tt.mad/Source/dsample.f b/epochX/cudacpp/gg_tt.mad/Source/dsample.f
@@ -1250,7 +1250,26 @@ subroutine sample_get_x_old(wgt, x, j, ipole, xmin, xmax)
 c     Local
 c
       integer  im, ip,ij,icount,it_warned
-      double precision xbin_min,xbin_max,ddum(maxdim),xo,y
+      double precision xbin_min,xbin_max,ddum(maxdim),xo,y 
+c
+c     Local (performance optimization #969)
+c
+      integer xbinarraydim
+      parameter (xbinarraydim=maxdim*lmaxconfigs)
+      double precision xbin_min0_array(maxdim, lmaxconfigs)
+      double precision xbin_max1_array(maxdim, lmaxconfigs)
+      logical xbin_min0_saved(maxdim, lmaxconfigs)
+      logical xbin_max1_saved(maxdim, lmaxconfigs)
+      save xbin_min0_array, xbin_max1_array
+      save xbin_min0_saved, xbin_max1_saved
+      data xbin_min0_saved/xbinarraydim*.false./
+      data xbin_max1_saved/xbinarraydim*.false./
+
+      character*255 env_name, env_value
+      integer env_length, env_status
+      logical first, skipxbinchecks
+      data first, skipxbinchecks/.true., .false./
+      save first, skipxbinchecks
 c
 c     External
 c
@@ -1301,15 +1320,29 @@ subroutine sample_get_x_old(wgt, x, j, ipole, xmin, xmax)
       endif
       if (ituple .eq. 1) then
 c         write(*,*) 'Getting variable',ipole,j,minvar(j,ipole)
-         xbin_min = xbin(xmin,minvar(j,ipole))
-         xbin_max = xbin(xmax,minvar(j,ipole))
-         if (xbin_min .gt. xbin_max-1) then
-c            write(*,'(a,4e15.4)') 'Bad limits',xbin_min,xbin_max,
-c     &           xmin,xmax
-c            xbin_max=xbin_min+1d-10
-            xbin_max = xbin(xmax,minvar(j,ipole))
-            xbin_min = min(xbin(xmin,minvar(j,ipole)), xbin_max)
-         endif
+
+        if(xmax.ne.1 .or. .not.xbin_max1_saved(j,ipole)) then
+          xbin_max = xbin(xmax, minvar(j,ipole))
+          if(xmax.eq.1) then
+            xbin_max1_array(j,ipole) = xbin_max
+            xbin_max1_saved(j,ipole) = .true.
+          endif
+        else
+          xbin_max = xbin_max1_array(j,ipole)
+        endif
+
+        if(xmin.ne.0 .or. .not.xbin_min0_saved(j,ipole)) then
+          xbin_min = xbin(xmin, minvar(j,ipole))
+          if (xbin_min .gt. xbin_max-1) then
+            xbin_min = min(xbin_min, xbin_max)
+          endif
+          if(xmin.eq.0) then
+            xbin_min0_array(j,ipole) = xbin_min
+            xbin_min0_saved(j,ipole) = .true.
+          endif
+        else
+          xbin_min = xbin_min0_array(j,ipole)
+        endif
 c
 c     Line which allows us to keep choosing same x
 c
@@ -1322,10 +1355,10 @@ subroutine sample_get_x_old(wgt, x, j, ipole, xmin, xmax)
             call ntuple(ddum(j),max(xbin_min,dble(int(tx(2,j)))),
      $           min(xbin_max,dble(int(tx(2,j))+1)),j,ipole)
 
-            if(max(xbin_min,dble(int(tx(2,j)))).gt.
-     $           min(xbin_max,dble(int(tx(2,j))+1))) then
+c           if(max(xbin_min,dble(int(tx(2,j)))).gt.
+c    $           min(xbin_max,dble(int(tx(2,j))+1))) then
 c               write(*,*) 'not good'
-            endif
+c           endif
 
 c            write(*,'(2i6,4e15.5)') nzoom,j,ddum(j),tx(2,j),
 c     $           max(xbin_min,dble(int(tx(2,j)))),
@@ -1388,7 +1421,19 @@ subroutine sample_get_x_old(wgt, x, j, ipole, xmin, xmax)
 c     double precision is about 18 digits, we expect things to agree to
 c     3 digit accuracy.
 c
-      if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
+      if (first) then
+        env_name = 'CUDACPP_RUNTIME_SKIPXBINCHECKS'
+        call get_environment_variable(env_name, env_value, env_length, env_status)
+        if( env_status.eq.0 ) then
+          skipxbinchecks = .true.
+        endif
+      endif
+
+      if (skipxbinchecks) then
+        if (first) then
+          write(6,*) 'WARNING: skipping xbin checks (CUDACPP_RUNTIME_SKIPXBINCHECKS is set)'
+        endif
+      else if (abs(ddum(j)-xbin(x,ij))/(ddum(j)+1d-22) .gt. 1e-3) then
          if (icount .lt. 5) then
             write(*,'(a,i4,2e14.6,1e12.4)')
      &           'Warning xbin not returning correct x', ij,
@@ -1399,10 +1444,11 @@ subroutine sample_get_x_old(wgt, x, j, ipole, xmin, xmax)
          endif
          icount=icount+1
       endif
-      if (x .lt. xmin .or. x .gt. xmax) then
+      first = .false.
+c     if (x .lt. xmin .or. x .gt. xmax) then
 c         write(*,'(a,4i4,2f24.16,1e10.2)') 'Bad x',ij,int(xbin_min),ip,
 c     &        int(xbin_max),xmin,x,xmax-xmin
-      endif
+c     endif
 
       wgt = wgt * xo * dble(xbin_max-xbin_min)
 c      print*,'Returning x',ij,ipole,j,x

diff --git a/epochX/cudacpp/pp_dy3j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_dy3j.mad/Cards/me5_configuration.txt
@@ -234,7 +234,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2024/madgraph4gpuX/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2024/madgraph4gpuX/MG5aMC/mg5amcnlo