Skip to content

Commit

Permalink
Merge pull request #1006 from valassi/clang
Browse files Browse the repository at this point in the history
Fixes for clang16, gcc14.2, HIP/AMD
  • Loading branch information
valassi committed Sep 19, 2024
2 parents a6d55f6 + 74608a4 commit fe331ed
Show file tree
Hide file tree
Showing 253 changed files with 12,192 additions and 11,903 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,14 @@ namespace mg5amcCpu
, sqsWGdiff( 0 )
, tag( "" ) {}
// Combine two EventStatistics
EventStatistics& operator+=( const EventStatistics& stats )
#ifdef __clang__
// Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__)
// Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__)
// See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization
__attribute__( ( optnone ) )
#endif
EventStatistics&
operator+=( const EventStatistics& stats )
{
EventStatistics s1 = *this; // temporary copy
EventStatistics s2 = stats; // temporary copy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda)
GPULANGUAGE = cu
GPUSUFFIX = cuda

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# NVidia CUDA architecture flags
Expand Down Expand Up @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip)
GPULANGUAGE = hip
GPUSUFFIX = hip

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY

# AMD HIP architecture flags
GPUARCHFLAGS = --offload-arch=gfx90a
GPUFLAGS += $(GPUARCHFLAGS)
Expand Down Expand Up @@ -874,7 +877,7 @@ endif
$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
endif
Expand Down Expand Up @@ -975,7 +978,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both
$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda
endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -704,28 +704,29 @@ namespace mg5amcGpu
namespace mg5amcCpu
#endif
{
// The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[]
// The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[]
// It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined
// It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype
class cxtype_ref
{
public:
cxtype_ref() = delete;
cxtype_ref( const cxtype_ref& ) = delete;
cxtype_ref( cxtype_ref&& ) = default; // copy refs
cxtype_ref( cxtype_ref&& ) = default; // copy const refs
__host__ __device__ cxtype_ref( fptype& r, fptype& i )
: m_preal( &r ), m_pimag( &i ) {} // copy refs
: m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs
cxtype_ref& operator=( const cxtype_ref& ) = delete;
//__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary
__host__ __device__ cxtype_ref& operator=( const cxtype& c )
{
*m_preal = cxreal( c );
*m_pimag = cximag( c );
return *this;
} // copy values
} // copy (assign) non-const values
__host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); }
private:
fptype *m_preal, *m_pimag; // RI
fptype* const m_preal; // const pointer to non-const fptype R
fptype* const m_pimag; // const pointer to non-const fptype I
};

// Printout to stream for user defined types
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ namespace mg5amcCpu
#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK
// NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED
// NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[]
// NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! **
cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); }
//cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004
cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); }
cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); }
#endif
const fptype_v& real() const
{
Expand Down
16 changes: 8 additions & 8 deletions epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.005808353424072266 
DEBUG: model prefixing takes 0.005692958831787109 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -182,19 +182,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum
DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547] 
DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548] 
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
Wrote files for 8 helas calls in 0.075 s
Wrote files for 8 helas calls in 0.072 s
DEBUG: self.vector_size =  32 [export_v4.py at line 7023] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates 3 routines in 0.211 s
ALOHA: aloha creates 3 routines in 0.205 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 7 routines in 0.262 s
ALOHA: aloha creates 7 routines in 0.260 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand Down Expand Up @@ -234,10 +234,10 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m2.190s
user 0m1.811s
sys 0m0.293s
Code generation completed in 2 seconds
real 0m3.845s
user 0m1.829s
sys 0m0.251s
Code generation completed in 4 seconds
************************************************************
* *
* W E L C O M E to *
Expand Down
9 changes: 8 additions & 1 deletion epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,14 @@ namespace mg5amcCpu
, sqsWGdiff( 0 )
, tag( "" ) {}
// Combine two EventStatistics
EventStatistics& operator+=( const EventStatistics& stats )
#ifdef __clang__
// Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__)
// Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__)
// See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization
__attribute__( ( optnone ) )
#endif
EventStatistics&
operator+=( const EventStatistics& stats )
{
EventStatistics s1 = *this; // temporary copy
EventStatistics s2 = stats; // temporary copy
Expand Down
11 changes: 7 additions & 4 deletions epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda)
GPULANGUAGE = cu
GPUSUFFIX = cuda

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# NVidia CUDA architecture flags
Expand Down Expand Up @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip)
GPULANGUAGE = hip
GPUSUFFIX = hip

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY

# AMD HIP architecture flags
GPUARCHFLAGS = --offload-arch=gfx90a
GPUFLAGS += $(GPUARCHFLAGS)
Expand Down Expand Up @@ -874,7 +877,7 @@ endif
$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
endif
Expand Down Expand Up @@ -975,7 +978,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both
$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda
endif
Expand Down
11 changes: 6 additions & 5 deletions epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -704,28 +704,29 @@ namespace mg5amcGpu
namespace mg5amcCpu
#endif
{
// The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[]
// The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[]
// It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined
// It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype
class cxtype_ref
{
public:
cxtype_ref() = delete;
cxtype_ref( const cxtype_ref& ) = delete;
cxtype_ref( cxtype_ref&& ) = default; // copy refs
cxtype_ref( cxtype_ref&& ) = default; // copy const refs
__host__ __device__ cxtype_ref( fptype& r, fptype& i )
: m_preal( &r ), m_pimag( &i ) {} // copy refs
: m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs
cxtype_ref& operator=( const cxtype_ref& ) = delete;
//__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary
__host__ __device__ cxtype_ref& operator=( const cxtype& c )
{
*m_preal = cxreal( c );
*m_pimag = cximag( c );
return *this;
} // copy values
} // copy (assign) non-const values
__host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); }
private:
fptype *m_preal, *m_pimag; // RI
fptype* const m_preal; // const pointer to non-const fptype R
fptype* const m_pimag; // const pointer to non-const fptype I
};

// Printout to stream for user defined types
Expand Down
5 changes: 3 additions & 2 deletions epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ namespace mg5amcCpu
#ifdef MGONGPU_HAS_CPPCXTYPEV_BRK
// NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED
// NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[]
// NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! **
cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); }
//cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004
cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); }
cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); }
#endif
const fptype_v& real() const
{
Expand Down
12 changes: 6 additions & 6 deletions epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.00577545166015625 
DEBUG: model prefixing takes 0.005699634552001953 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes.
INFO: Please specify coupling orders to bypass this step.
INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1
INFO: Process has 2 diagrams
1 processes with 2 diagrams generated in 0.005 s
1 processes with 2 diagrams generated in 0.004 s
Total: 1 processes with 2 diagrams
output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
Load PLUGIN.CUDACPP_OUTPUT
Expand Down Expand Up @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 4 routines in 0.274 s
ALOHA: aloha creates 4 routines in 0.276 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand All @@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory
INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/.
quit

real 0m0.708s
user 0m0.606s
sys 0m0.059s
real 0m0.775s
user 0m0.619s
sys 0m0.043s
Code generation completed in 1 seconds
9 changes: 8 additions & 1 deletion epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,14 @@ namespace mg5amcCpu
, sqsWGdiff( 0 )
, tag( "" ) {}
// Combine two EventStatistics
EventStatistics& operator+=( const EventStatistics& stats )
#ifdef __clang__
// Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__)
// Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__)
// See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization
__attribute__( ( optnone ) )
#endif
EventStatistics&
operator+=( const EventStatistics& stats )
{
EventStatistics s1 = *this; // temporary copy
EventStatistics s2 = stats; // temporary copy
Expand Down
11 changes: 7 additions & 4 deletions epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda)
GPULANGUAGE = cu
GPUSUFFIX = cuda

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# NVidia CUDA architecture flags
Expand Down Expand Up @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip)
GPULANGUAGE = hip
GPUSUFFIX = hip

# Basic compiler flags (optimization and includes)
# Optimization flags
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY

# AMD HIP architecture flags
GPUARCHFLAGS = --offload-arch=gfx90a
GPUFLAGS += $(GPUARCHFLAGS)
Expand Down Expand Up @@ -874,7 +877,7 @@ endif
$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
endif
Expand Down Expand Up @@ -975,7 +978,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both
$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64
$(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
else
$(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda
endif
Expand Down
Loading

0 comments on commit fe331ed

Please sign in to comment.