Skip to content

Commit

Permalink
Merge pull request #780 from valassi/oct23av
Browse files Browse the repository at this point in the history
Patches and test results over the latest PRs
  • Loading branch information
valassi authored Nov 3, 2023
2 parents 00dbc53 + f53166d commit 63ddd4d
Show file tree
Hide file tree
Showing 591 changed files with 34,691 additions and 28,177 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/c-cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,40 @@ jobs:
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: make epoch1
- name: make debug
run: make -C ${{ matrix.folder }} debug
CPU:
runs-on: ubuntu-latest
strategy:
matrix:
folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ]
folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ]
precision: [ d , f , m ]
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: make info
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info
- name: make
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }}
- name: make check
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check
CPU_MAC:
runs-on: macos-latest
env:
FC: gfortran-11
strategy:
matrix:
folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ]
folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum, epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ]
precision: [ d , f , m ]
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: make info
run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info
run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info
- name: make
run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }}
- name: make check
run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check
run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check
GPU:
runs-on: self-hosted
env:
Expand All @@ -57,16 +57,16 @@ jobs:
REQUIRE_CUDA: 1
strategy:
matrix:
folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ]
folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ]
precision: [ d , f , m ]
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: path
run: echo "PATH=$PATH"
- name: make info
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info
- name: make
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }}
- name: make check
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check
run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.

#include "timer.h"
#define TIMERTYPE std::chrono::high_resolution_clock
Expand Down Expand Up @@ -40,7 +40,6 @@ extern "C"
static float smatrix1_totaltime = 0;
static mgOnGpu::Timer<TIMERTYPE> smatrix1multi_timer[nimplC];
static float smatrix1multi_totaltime[nimplC] = { 0 };
static int matrix1_counter = 0;
static int smatrix1_counter = 0;
static int smatrix1multi_counter[nimplC] = { 0 };

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
index 27ed1439e..3b24a9924 100644
index 880769442..5a3da931f 100644
--- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
@@ -469,23 +469,140 @@ C
@@ -484,23 +484,140 @@ C
INTEGER VECSIZE_USED

INTEGER IVEC
Expand Down Expand Up @@ -284,7 +284,7 @@ index 71fbf2b25..0f1d199fc 100644
open(unit=lun,file=tempname,status='old',ERR=20)
fopened=.true.
diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
index 3ac962688..ef18aff22 100644
index 3ac962688..daea73a6d 100644
--- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
@@ -72,7 +72,10 @@ C
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ index a59181c70..af7e0efbc 100644
PARAMETER(MAXTRIES=25)
C To pass the helicity configuration chosen by the DiscreteSampler to
diff --git b/epochX/cudacpp/gg_tt.mad/Source/makefile a/epochX/cudacpp/gg_tt.mad/Source/makefile
index 617f10b93..dbe08b846 100644
index 617f10b93..00c73099a 100644
--- b/epochX/cudacpp/gg_tt.mad/Source/makefile
+++ a/epochX/cudacpp/gg_tt.mad/Source/makefile
@@ -120,7 +120,7 @@ $(LIBDIR)libiregi.a: $(IREGIDIR)
Expand All @@ -37,12 +37,11 @@ index 617f10b93..dbe08b846 100644
+ for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done;
+cleanall: cleanSource # THIS IS THE ONE
+ for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done;
+
diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
index 348c283be..74db44d84 100644
index 348c283be..65369d610 100644
--- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
+++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile
@@ -1,6 +1,22 @@
@@ -1,6 +1,28 @@
+SHELL := /bin/bash
+
include ../../Source/make_opts
Expand All @@ -54,6 +53,12 @@ index 348c283be..74db44d84 100644
+# Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740)
+CXXFLAGS = -O3 -Wall -Wshadow -Wextra
+
+# Add -std=c++17 explicitly to avoid build errors on macOS
+# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked"
+ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),)
+CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3
+endif
+
+# Enable ccache if USECCACHE=1
+ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1)
+ override CXX:=ccache $(CXX)
Expand All @@ -65,7 +70,7 @@ index 348c283be..74db44d84 100644
# Load additional dependencies of the bias module, if present
ifeq (,$(wildcard ../bias_dependencies))
BIASDEPENDENCIES =
@@ -24,7 +40,26 @@ else
@@ -24,7 +46,26 @@ else
MADLOOP_LIB =
endif

Expand All @@ -81,19 +86,19 @@ index 348c283be..74db44d84 100644
+CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)")
+###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV))
+###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))"))
+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
+#ifeq ($(CUDACPP_BUILDDIR),)
+#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
+#else
+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//)
+ifeq ($(CUDACPP_BUILDDIR),)
+$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!)
+else
+$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)')
+#endif
+endif
+CUDACPP_COMMONLIB=mg5amc_common
+CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp
+CUDACPP_CULIB=mg5amc_$(processid_short)_cuda

LIBS = $(LIBDIR)libbias.$(libext) $(LIBDIR)libdhelas.$(libext) $(LIBDIR)libdsample.$(libext) $(LIBDIR)libgeneric.$(libext) $(LIBDIR)libpdf.$(libext) $(LIBDIR)libgammaUPC.$(libext) $(LIBDIR)libmodel.$(libext) $(LIBDIR)libcernlib.$(libext) $(MADLOOP_LIB) $(LOOP_LIBS)

@@ -43,41 +78,112 @@ ifeq ($(strip $(MATRIX_HEL)),)
@@ -43,41 +84,117 @@ ifeq ($(strip $(MATRIX_HEL)),)
endif


Expand All @@ -113,7 +118,12 @@ index 348c283be..74db44d84 100644

-$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX)
- $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
+#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?)
+ifeq ($(UNAME),Darwin)
+LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so)
+LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked"
+else
+LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS)
+endif

-$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL)
- $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp
Expand All @@ -128,8 +138,8 @@ index 348c283be..74db44d84 100644
+else ifneq ($(shell $(CXX) --version | egrep '^clang'),)
+override OMPFLAGS = -fopenmp
+$(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604
+###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),)
+###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang
+else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),)
+override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang
+else
+override OMPFLAGS = -fopenmp
+endif
Expand Down Expand Up @@ -166,24 +176,24 @@ index 348c283be..74db44d84 100644
+madevent_fortran_link: $(PROG)_fortran
+ rm -f $(PROG)
+ ln -s $(PROG)_fortran $(PROG)
+

-$(LIBDIR)libpdf.$(libext):
- cd ../../Source/PDF; make
+madevent_cpp_link: $(CUDACPP_BUILDDIR)/$(PROG)_cpp
+ rm -f $(PROG)
+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG)
+

-$(LIBDIR)libgammaUPC.$(libext):
- cd ../../Source/PDF/gammaUPC; make
+madevent_cuda_link: $(CUDACPP_BUILDDIR)/$(PROG)_cuda
+ rm -f $(PROG)
+ ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROG)

-$(LIBDIR)libpdf.$(libext):
- cd ../../Source/PDF; make
+
+# Building $(PROG)_cpp also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (improved patch for cpp-only builds #503)
+$(CUDACPP_BUILDDIR)/$(PROG)_cpp: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs
+ $(FC) -o $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_CXXLIB) $(LIBFLAGSRPATH) $(LDFLAGS)
+ if [ -f $(LIBDIR)/$(CUDACPP_BUILDDIR)/lib$(CUDACPP_CULIB).* ]; then $(FC) -o $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_CULIB) $(LIBFLAGSRPATH) $(LDFLAGS); fi

-$(LIBDIR)libgammaUPC.$(libext):
- cd ../../Source/PDF/gammaUPC; make
+
+$(CUDACPP_BUILDDIR)/$(PROG)_cuda: $(CUDACPP_BUILDDIR)/$(PROG)_cpp
+
+counters.o: counters.cc timer.h
Expand Down Expand Up @@ -222,7 +232,7 @@ index 348c283be..74db44d84 100644

# Dependencies

@@ -97,5 +203,61 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \
@@ -97,5 +214,61 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \
run_config.inc
initcluster.o: message.inc

Expand Down Expand Up @@ -287,10 +297,10 @@ index 348c283be..74db44d84 100644
+distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation
+ $(MAKE) -f $(CUDACPP_MAKEFILE) distclean
diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py
index 4dd71db86..3b8ec3121 100755
index ebbc1ac1d..a88d60b28 100755
--- b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py
+++ a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py
@@ -380,8 +380,20 @@ class gensym(object):
@@ -385,8 +385,20 @@ class gensym(object):
done = True
if not done:
raise Exception('Parsing error in gensym: %s' % stdout)
Expand All @@ -314,7 +324,7 @@ index 4dd71db86..3b8ec3121 100755
self.submit_to_cluster(job_list)
job_list = {}
diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py
index a056d3861..b70b548e5 100755
index 389b93ab8..d72270289 100755
--- b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py
+++ a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py
@@ -3614,8 +3614,20 @@ Beware that this can be dangerous for local multicore runs.""")
Expand Down
32 changes: 13 additions & 19 deletions epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Copyright (C) 2020-2023 CERN and UCLouvain.
# Licensed under the GNU Lesser General Public License (version 3 or later).
# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin.
# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin.

import logging
import os
Expand All @@ -19,22 +23,15 @@
import madgraph.various.banner as banner_mod

class CPPMEInterface(madevent_interface.MadEventCmdShell):

def compile(self, *args, **opts):
""" """

import multiprocessing
if not self.options['nb_core'] or self.options['nb_core'] == 'None':
self.options['nb_core'] = multiprocessing.cpu_count()

if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
import pathlib
import os
pjoin = os.path.join




cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py
logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend)
if cudacpp_backend == 'FORTRAN':
Expand All @@ -50,15 +47,14 @@ def compile(self, *args, **opts):
return misc.compile(nb_core=self.options['nb_core'], *args, **opts)

class CPPRunCard(banner_mod.RunCardLO):

def reset_simd(self, old_value, new_value, name):
if not hasattr(self, 'path'):
raise Exception

logger.warning('WARNING! CPPRunCard instance has no attribute path')
return
###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path')
if name == "vector_size" and new_value <= int(old_value):
# code can handle the new size -> do not recompile
return

Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source')
subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

Expand All @@ -68,33 +64,31 @@ def plugin_input(self, finput):
def default_setup(self):
super().default_setup()
self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)


def write_one_include_file(self, output_dir, incname, output_file=None):
"""write one include file at the time"""

if incname == "vector.inc" and 'vector_size' not in self.user_set:
return
super().write_one_include_file(output_dir, incname, output_file)


def check_validity(self):
"""ensure that PLUGIN information are consistent"""

super().check_validity()

if self['SDE_strategy'] != 1:
logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode')
self['sde_strategy'] = 1

if self['hel_recycling']:
self['hel_recycling'] = False

class GPURunCard(CPPRunCard):

def default_setup(self):
super(CPPRunCard, self).default_setup()
self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False)


#class CUDACPPRunCard(CPPRunCard):
# def default_setup(self):
# super(CPPRunCard, self).default_setup()
# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False)

MEINTERFACE = CPPMEInterface
RunCard = CPPRunCard
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ namespace mg5amcCpu
bool known = true;
bool ok = __builtin_cpu_supports( "sse4.2" );
const std::string tag = "nehalem (SSE4.2)";
#else
#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted
bool known = false; // __builtin_cpu_supports is not supported
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
// See https://stackoverflow.com/q/62783908
Expand Down
Loading

0 comments on commit 63ddd4d

Please sign in to comment.