Skip to content

Commit

Permalink
[helas] in gg_tt.mad cudacpp.mk, temporarely go back and try to use h…
Browse files Browse the repository at this point in the history
…ipcc instead of gfortran to link fcheck_hip.exe: this links but it fails at runtime, will revert

Also add -gggdb for debugging. At runtime this fails with the usual madgraph5#802.
It is now clear that this is in gpuMemcpyToSymbol (line 558)
And the error is precisely 'shared object initialization failed'

./fcheck_hip.exe 1 32 1
...
WARNING! Instantiate device Bridge (nevt=32, gpublocks=1, gputhreads=32, gpublocks*gputhreads=32)
ERROR! assertGpu: 'shared object initialization failed' (303) in CPPProcess.cc:558
fcheck_hip.exe: ./GpuRuntime.h:26: void assertGpu(hipError_t, const char *, int, bool): Assertion `code == gpuSuccess' failed.

Program received signal SIGABRT: Process abort signal.
Backtrace for this error:
0  0x14f947bff2e2 in ???
1  0x14f947bfe475 in ???
2  0x14f945f33dbf in ???
3  0x14f945f33d2b in ???
4  0x14f945f353e4 in ???
5  0x14f945f2bc69 in ???
6  0x14f945f2bcf1 in ???
7  0x14f947bcef96 in _Z9assertGpu10hipError_tPKcib
        at ./GpuRuntime.h:26
8  0x14f947bcef96 in _ZN9mg5amcGpu10CPPProcessC2Ebb
        at /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc:558
9  0x14f947bd2cf3 in _ZN9mg5amcGpu6BridgeIdEC2Ejjj
        at ./Bridge.h:268
10  0x14f947bd678e in fbridgecreate_
        at /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fbridge.cc:54
11  0x2168fd in ???
12  0x216bfe in ???
13  0x14f945f1e24c in ???
14  0x216249 in _start
        at ../sysdeps/x86_64/start.S:120
15  0xffffffffffffffff in ???
Aborted
  • Loading branch information
valassi committed Sep 20, 2024
1 parent 60fe59b commit 988419b
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ else ifeq ($(BACKEND),hip)
GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt))

# DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland)
###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY
GPUFLAGS += -ggdb # FOR DEBUGGING ONLY

# AMD HIP architecture flags
GPUARCHFLAGS = --offload-arch=gfx90a
Expand Down Expand Up @@ -893,7 +893,8 @@ endif
$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
$(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64
# $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 # fails to link
$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -fgpu-rdc --hip-link # links but crashes
else
$(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe)
endif
Expand Down

0 comments on commit 988419b

Please sign in to comment.