Skip to content

Commit

Permalink
[compiler-rt] Initial support for builtins on GPU targets (#95304)
Browse files Browse the repository at this point in the history
Summary:
This patch adds initial support to build the `builtins` library for GPU
targets. Primarily this requires adding a few new architectures for
`amdgcn` and `nvptx64`. I built this using the following invocations.

```console
$ cmake ../compiler-rt -DCMAKE_C_COMPILER=clang
  -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -GNinja
  -DCMAKE_C_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_CXX_COMPILER_TARGET=<nvptx64-nvidia-cuda|amdgcn-amd-amdhsa>
  -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1
  -DLLVM_CMAKE_DIR=../cmake/Modules -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON
  -C ../compiler-rt/cmake/caches/GPU.cmake
```

Some pointers would be appreciated for how to test this using a standard
(non-default target only) build.

GPU builds are somewhat finnicky. We only expect this to be built with a
sufficiently new clang, as it's the only compiler that supports the
target and output we distribute. Distribution is done as LLVM-IR blobs
for now.
GPUs have little backward compatibility, so linking object files is
left to a future patch.

More work is necessary to build correctly for all targets and ship into
the correct clang resource directory. Additionally we need to use the
`libc` project's support for running unit tests.
  • Loading branch information
jhuber6 authored Jul 10, 2024
1 parent e635260 commit dad7442
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 8 deletions.
5 changes: 3 additions & 2 deletions compiler-rt/cmake/Modules/AddCompilerRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,9 @@ function(add_compiler_rt_runtime name type)
${ARGN})
set(libnames)
# Until we support this some other way, build compiler-rt runtime without LTO
# to allow non-LTO projects to link with it.
if(COMPILER_RT_HAS_FNO_LTO_FLAG)
# to allow non-LTO projects to link with it. GPU targets can currently only be
# distributed as LLVM-IR and ignore this.
if(COMPILER_RT_HAS_FNO_LTO_FLAG AND NOT COMPILER_RT_GPU_BUILD)
set(NO_LTO_FLAGS "-fno-lto")
else()
set(NO_LTO_FLAGS "")
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/cmake/Modules/BuiltinTests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ endfunction()
function(builtin_check_c_compiler_flag flag output)
if(NOT DEFINED ${output})
message(STATUS "Performing Test ${output}")
try_compile_only(result FLAGS ${flag})
try_compile_only(result FLAGS ${flag} ${CMAKE_REQUIRED_FLAGS})
set(${output} ${result} CACHE INTERNAL "Compiler supports ${flag}")
if(${result})
message(STATUS "Performing Test ${output} - Success")
Expand Down
27 changes: 26 additions & 1 deletion compiler-rt/cmake/Modules/CompilerRTUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ macro(test_target_arch arch def)
endmacro()

macro(detect_target_arch)
check_symbol_exists(__AMDGPU__ "" __AMDGPU)
check_symbol_exists(__arm__ "" __ARM)
check_symbol_exists(__AVR__ "" __AVR)
check_symbol_exists(__aarch64__ "" __AARCH64)
Expand All @@ -154,6 +155,7 @@ macro(detect_target_arch)
check_symbol_exists(__loongarch__ "" __LOONGARCH)
check_symbol_exists(__mips__ "" __MIPS)
check_symbol_exists(__mips64__ "" __MIPS64)
check_symbol_exists(__NVPTX__ "" __NVPTX)
check_symbol_exists(__powerpc__ "" __PPC)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
Expand All @@ -164,7 +166,9 @@ macro(detect_target_arch)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
check_symbol_exists(__ve__ "" __VE)
if(__ARM)
if(__AMDGPU)
add_default_target_arch(amdgcn)
elseif(__ARM)
add_default_target_arch(arm)
elseif(__AVR)
add_default_target_arch(avr)
Expand Down Expand Up @@ -192,6 +196,8 @@ macro(detect_target_arch)
add_default_target_arch(mips64)
elseif(__MIPS)
add_default_target_arch(mips)
elseif(__NVPTX)
add_default_target_arch(nvptx64)
elseif(__PPC64) # must be checked before __PPC
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
Expand Down Expand Up @@ -397,6 +403,21 @@ macro(construct_compiler_rt_default_triple)
set(COMPILER_RT_DEFAULT_TARGET_ARCH "i386")
endif()

# If we are directly targeting a GPU we need to check that the compiler is
# compatible and pass some default arguments.
if(COMPILER_RT_DEFAULT_TARGET_ONLY)

# Pass the necessary flags to make flag detection work.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
set(COMPILER_RT_GPU_BUILD ON)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
set(COMPILER_RT_GPU_BUILD ON)
set(CMAKE_REQUIRED_FLAGS
"${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
endif()
endif()

# Determine if test target triple is specified explicitly, and doesn't match the
# default.
if(NOT COMPILER_RT_DEFAULT_TARGET_TRIPLE STREQUAL LLVM_TARGET_TRIPLE)
Expand Down Expand Up @@ -475,6 +496,10 @@ function(get_compiler_rt_target arch variable)
endif()
endif()
set(target "${arch}${triple_suffix}")
elseif("${arch}" MATCHES "^amdgcn")
set(target "amdgcn-amd-amdhsa")
elseif("${arch}" MATCHES "^nvptx")
set(target "nvptx64-nvidia-cuda")
else()
set(target "${arch}${triple_suffix}")
endif()
Expand Down
7 changes: 7 additions & 0 deletions compiler-rt/cmake/base-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ macro(test_targets)
test_target_arch(x86_64 "" "")
endif()
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
"-flto" "-fconvergent-functions"
"-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
test_target_arch(loongarch64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "powerpc64le|ppc64le")
Expand Down Expand Up @@ -254,6 +258,9 @@ macro(test_targets)
test_target_arch(mips "" "-mips32r2" "-mabi=32" "-D_LARGEFILE_SOURCE=1" "-D_FILE_OFFSET_BITS=64")
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
"-fconvergent-functions" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
Expand Down
13 changes: 10 additions & 3 deletions compiler-rt/cmake/builtin-config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ builtin_check_c_compiler_flag(-fno-profile-generate COMPILER_RT_HAS_FNO_PROFILE_
builtin_check_c_compiler_flag(-fno-profile-instr-generate COMPILER_RT_HAS_FNO_PROFILE_INSTR_GENERATE_FLAG)
builtin_check_c_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR_USE_FLAG)
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)

Expand Down Expand Up @@ -52,6 +56,7 @@ else()
set(OS_NAME "${CMAKE_SYSTEM_NAME}")
endif()

set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf armv4t armv5te armv6 armv6m armv7m armv7em armv7 armv7s armv7k armv8m.base armv8m.main armv8.1m.main)
set(AVR avr)
Expand All @@ -61,6 +66,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
Expand All @@ -78,8 +84,8 @@ if(APPLE)
endif()

set(ALL_BUILTIN_SUPPORTED_ARCH
${X86} ${X86_64} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC32} ${PPC64}
${X86} ${X86_64} ${AMDGPU} ${ARM32} ${ARM64} ${AVR}
${HEXAGON} ${MIPS32} ${MIPS64} ${NVPTX} ${PPC32} ${PPC64}
${RISCV32} ${RISCV64} ${SPARC} ${SPARCV9}
${WASM32} ${WASM64} ${VE} ${LOONGARCH64})

Expand Down Expand Up @@ -245,7 +251,8 @@ else()
${ALL_BUILTIN_SUPPORTED_ARCH})
endif()

if (OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER)
if(OS_NAME MATCHES "Linux|SerenityOS" AND NOT LLVM_USE_SANITIZER AND NOT
COMPILER_RT_GPU_BUILD)
set(COMPILER_RT_HAS_CRT TRUE)
else()
set(COMPILER_RT_HAS_CRT FALSE)
Expand Down
18 changes: 18 additions & 0 deletions compiler-rt/cmake/caches/GPU.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
# amdgcn and nvptx builds targeting the builtins library.

set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")

set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
21 changes: 20 additions & 1 deletion compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
"Skip the atomic builtin (these should normally be provided by a shared library)"
On)

if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD AND NOT COMPILER_RT_GPU_BUILD)
set(GENERIC_SOURCES
${GENERIC_SOURCES}
emutls.c
Expand Down Expand Up @@ -627,6 +627,8 @@ if (MINGW)
)
endif()

set(amdgcn_SOURCES ${GENERIC_SOURCES})

set(armv4t_SOURCES ${arm_min_SOURCES})
set(armv5te_SOURCES ${arm_min_SOURCES})
set(armv6_SOURCES ${arm_min_SOURCES})
Expand Down Expand Up @@ -706,6 +708,8 @@ set(mips64_SOURCES ${GENERIC_TF_SOURCES}
set(mips64el_SOURCES ${GENERIC_TF_SOURCES}
${mips_SOURCES})

set(nvptx64_SOURCES ${GENERIC_SOURCES})

set(powerpc_SOURCES ${GENERIC_SOURCES})

set(powerpcspe_SOURCES ${GENERIC_SOURCES})
Expand Down Expand Up @@ -811,6 +815,21 @@ else ()
endif()
endif()

# Directly targeting the GPU requires a few extra flags.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
-fconvergent-functions BUILTIN_CFLAGS)

# AMDGPU targets want to use a generic ABI.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
"SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
endif()
endif()

set(BUILTIN_DEFS "")

if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS)
Expand Down

0 comments on commit dad7442

Please sign in to comment.