Skip to content

Commit

Permalink
Merge branch 'helas' into ggtt4g
Browse files Browse the repository at this point in the history
Fix conflicts:
	epochX/cudacpp/tput/teeThroughputX.sh
	epochX/cudacpp/tput/throughputX.sh
  • Loading branch information
valassi committed Aug 29, 2024
2 parents b498209 + 718a84e commit 1bef1ed
Show file tree
Hide file tree
Showing 323 changed files with 20,669 additions and 7,091 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include "MemoryAccessHelpers.h"
#include "MemoryAccessVectors.h"
#include "MemoryBuffers.h" // for HostBufferMatrixElements::isaligned
#include "MemoryBuffers.h" // for HostBufferGs::isaligned

// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
#ifdef MGONGPUCPP_GPUIMPL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#include "mgOnGpuConfig.h"

#include "CPPProcess.h"
#include "CPPProcess.h" // for CPPProcess::np4 and CPPProcess::npar (NB: npar may differ in different P* subprocess directories!)
#include "MemoryAccessHelpers.h"
#include "MemoryAccessVectors.h"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -958,6 +958,8 @@ main( int argc, char** argv )
<< " [" << process.getCompiler() << "]"
#ifdef MGONGPU_INLINE_HELAMPS
<< " [inlineHel=1]"
#elif defined MGONGPU_LINKER_HELAMPS
<< " [inlineHel=L]"
#else
<< " [inlineHel=0]"
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (C) 2020-2024 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Aug 2024) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin.

#ifdef MGONGPU_LINKER_HELAMPS

#include "HelAmps_sm.h"

// -----------------------------------------------------------------------------
// *** NB: this implementation class depends on MemoryAccessMomenta,
// *** where the AOSOA definition depends on CPPProcess::npar,
// *** which may be different in different P* subprocess directories:
// *** therefore this class is presently hosted and compiled in each P*
// -----------------------------------------------------------------------------

#include "MemoryAccessAmplitudes.h"
#include "MemoryAccessCouplings.h"
#include "MemoryAccessCouplingsFixed.h"
#include "MemoryAccessGs.h"
#include "MemoryAccessMatrixElements.h"
#include "MemoryAccessMomenta.h"
#include "MemoryAccessWavefunctions.h"

#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
#include "MemoryAccessDenominators.h"
#include "MemoryAccessNumerators.h"
#endif

#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
#endif
{
//--------------------------------------------------------------------------

#ifdef MGONGPUCPP_GPUIMPL
using M_ACCESS = DeviceAccessMomenta; // non-trivial access: buffer includes all events
using E_ACCESS = DeviceAccessMatrixElements; // non-trivial access: buffer includes all events
using W_ACCESS = DeviceAccessWavefunctions; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
using A_ACCESS = DeviceAccessAmplitudes; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
using CD_ACCESS = DeviceAccessCouplings; // non-trivial access (dependent couplings): buffer includes all events
using CI_ACCESS = DeviceAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
using NUM_ACCESS = DeviceAccessNumerators; // non-trivial access: buffer includes all events
using DEN_ACCESS = DeviceAccessDenominators; // non-trivial access: buffer includes all events
#endif
#else
using namespace ::mg5amcCpu;
using M_ACCESS = HostAccessMomenta; // non-trivial access: buffer includes all events
using E_ACCESS = HostAccessMatrixElements; // non-trivial access: buffer includes all events
using W_ACCESS = HostAccessWavefunctions; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
using A_ACCESS = HostAccessAmplitudes; // TRIVIAL ACCESS (no kernel splitting yet): buffer for one event
using CD_ACCESS = HostAccessCouplings; // non-trivial access (dependent couplings): buffer includes all events
using CI_ACCESS = HostAccessCouplingsFixed; // TRIVIAL access (independent couplings): buffer for one event
#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
using NUM_ACCESS = HostAccessNumerators; // non-trivial access: buffer includes all events
using DEN_ACCESS = HostAccessDenominators; // non-trivial access: buffer includes all events
#endif
#endif

//--------------------------------------------------------------------------
%(function_definitions2)s}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,11 @@ $(info HELINL='$(HELINL)')
ifeq ($(HELINL),1)
CXXFLAGS += -DMGONGPU_INLINE_HELAMPS
GPUFLAGS += -DMGONGPU_INLINE_HELAMPS
else ifeq ($(HELINL),L)
CXXFLAGS += -DMGONGPU_LINKER_HELAMPS
GPUFLAGS += -DMGONGPU_LINKER_HELAMPS
else ifneq ($(HELINL),0)
$(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported)
$(error Unknown HELINL='$(HELINL)': only 'L,', '0' and '1' are supported)
endif

# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1")
Expand Down Expand Up @@ -647,7 +650,6 @@ override RUNTIME =
#=== Makefile TARGETS and build rules below
#===============================================================================


ifeq ($(GPUCC),)
cxx_checkmain=$(BUILDDIR)/check_cpp.exe
cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe
Expand Down Expand Up @@ -776,6 +778,14 @@ gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementK
gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o
endif

# Add object files and special build flags only for the HELINL=L mode
ifeq ($(HELINL),L)
cxx_objects_lib+=$(BUILDDIR)/HelAmps_cpp.o
gpu_objects_lib+=$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o
$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # compilation fails if this is not added (ptxas fatal: Unresolved extern function)
$(BUILDDIR)/HelAmps_$(GPUSUFFIX).o: GPUFLAGS += -rdc true # runtime fails if this is not added ('invalid device symbol' in CPPProcess.cc cHel to tHel copy)
endif

# Target (and build rules): C++ and CUDA/HIP shared libraries
$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o
$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o
Expand All @@ -786,12 +796,12 @@ ifneq ($(GPUCC),)
$(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
$(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o
$(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib)
$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
$(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
# Bypass std::filesystem completely to ease portability on LUMI #803
#ifneq ($(findstring hipcc,$(GPUCC)),)
# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs
#else
# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPUARCHFLAGS) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
#endif
endif

Expand Down Expand Up @@ -962,6 +972,7 @@ $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY
$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
$(gpu_testmain): LIBFLAGS += $(GPUARCHFLAGS) # avoid "nvlink warning: SM Arch not found" when using rdc
###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ifneq ($(words $(filter $(FPTYPE), $(SUPPORTED_FPTYPES))),1)
$(error Invalid fptype FPTYPE='$(FPTYPE)': supported fptypes are $(foreach fptype,$(SUPPORTED_FPTYPES),'$(fptype)'))
endif

override SUPPORTED_HELINLS = 0 1
override SUPPORTED_HELINLS = L 0 1
ifneq ($(words $(filter $(HELINL), $(SUPPORTED_HELINLS))),1)
$(error Invalid helinl HELINL='$(HELINL)': supported helinls are $(foreach helinl,$(SUPPORTED_HELINLS),'$(helinl)'))
endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

// Choose if curand is supported for generating random numbers
// For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead)
// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
// For both CUDA and C++, by default, do not skip curand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND
// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785)
#if defined __HIPCC__
#define MGONGPU_HAS_NO_CURAND 1
Expand All @@ -45,7 +45,7 @@

// Choose if hiprand is supported for generating random numbers
// For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead)
// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
// For both HIP and C++, by default, do not skip hiprand, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND
// (there may exist HIP installations which do not include hiprand?)
#if defined __CUDACC__
#define MGONGPU_HAS_NO_HIPRAND 1
Expand Down Expand Up @@ -78,9 +78,16 @@
// Choose whether to inline all HelAmps functions
// This optimization can gain almost a factor 4 in C++, similar to -flto (issue #229)
// By default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_INLINE_HELAMPS
// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
//#undef MGONGPU_INLINE_HELAMPS // default
////#define MGONGPU_INLINE_HELAMPS 1

// Choose whether to compile and link all HelAmps functions as separate object files
// By default, do not link, but allow this macro to be set from outside with e.g. -DMGONGPU_LINKER_HELAMPS
// (NB: MGONGPU_INLINE_HELAMPS and MGONGPU_LINKER_HELAMPS are mutually exclusive)
//#undef MGONGPU_LINKER_HELAMPS // default
////#define MGONGPU_LINKER_HELAMPS 1

// Choose whether to hardcode the cIPD physics parameters rather than reading them from user cards
// This optimization can gain 20%% in CUDA in eemumu (issue #39)
// By default, do not hardcode, but allow this macro to be set from outside with e.g. -DMGONGPU_HARDCODE_PARAM
Expand Down Expand Up @@ -147,6 +154,11 @@
#endif
#endif

// SANITY CHECKS (HelAmps)
#if defined MGONGPU_INLINE_HELAMPS and defined MGONGPU_LINKER_HELAMPS
#error You must CHOOSE (AT MOST) ONLY ONE of MGONGPU_INLINE_HELAMPS or defined MGONGPU_LINKER_HELAMPS
#endif

// NB: namespace mgOnGpu includes types which are defined in exactly the same way for CPU and GPU builds (see #318 and #725)
namespace mgOnGpu
{
Expand Down
Loading

0 comments on commit 1bef1ed

Please sign in to comment.