[libc] Add Kernel Resource Usage to nvptx-loader #97503

jameshu15869 · 2024-07-03T01:05:24Z

This PR allows nvptx-loader to read the resource usage of _start, _begin, and _end when executing CUDA binaries.

Example output:

$ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__
[ RUN      ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper
[       OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev
_begin registers: 25
_start registers: 80
_end registers: 62

llvmbot · 2024-07-03T01:05:54Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-libc

Author: None (jameshu15869)

Changes

This PR allows nvptx-loader to read the resource usage of _start, _begin, and _end when executing CUDA binaries.

Example output:

$ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__
[ RUN      ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper
[       OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev
_begin registers: 25
_start registers: 80
_end registers: 62

Full diff: https://github.com/llvm/llvm-project/pull/97503.diff

5 Files Affected:

(modified) libc/benchmarks/gpu/CMakeLists.txt (+3)
(modified) libc/cmake/modules/LLVMLibCTestRules.cmake (+10)
(modified) libc/utils/gpu/loader/Loader.h (+1)
(modified) libc/utils/gpu/loader/Main.cpp (+5-1)
(modified) libc/utils/gpu/loader/nvptx/Loader.cpp (+18)

diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index d167abcaf2db1..4790e55bec478 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -19,9 +19,12 @@ function(add_benchmark benchmark_name)
     LINK_LIBRARIES
       LibcGpuBenchmark.hermetic
       ${BENCHMARK_LINK_LIBRARIES}
+    LOADER_ARGS
+      "--print-resource-usage"
     ${BENCHMARK_UNPARSED_ARGUMENTS}
   )
   get_fq_target_name(${benchmark_name} fq_target_name)
+
   add_dependencies(gpu-benchmark ${fq_target_name})
 endfunction(add_benchmark)
 
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index fbeec32883b63..52bc2ad03a30d 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -709,12 +709,22 @@ function(add_libc_hermetic test_name)
       $<TARGET_FILE:${fq_build_target_name}> ${HERMETIC_TEST_ARGS})
   add_custom_target(
     ${fq_target_name}
+    DEPENDS ${fq_target_name}-cmd
+  )
+
+  add_custom_command(
+    OUTPUT ${fq_target_name}-cmd
     COMMAND ${test_cmd}
     COMMAND_EXPAND_LISTS
     COMMENT "Running hermetic test ${fq_target_name}"
     ${LIBC_HERMETIC_TEST_JOB_POOL}
   )
 
+  set_source_files_properties(${fq_target_name}-cmd
+    PROPERTIES
+      SYMBOLIC "TRUE"
+  )
+
   add_dependencies(${HERMETIC_TEST_SUITE} ${fq_target_name})
   if(NOT ${HERMETIC_TEST_IS_BENCHMARK})
     # If it is a benchmark, it will already have been added to the
diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h
index eae2776b2773f..f576c58d902a1 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/libc/utils/gpu/loader/Loader.h
@@ -28,6 +28,7 @@ struct LaunchParameters {
   uint32_t num_blocks_x;
   uint32_t num_blocks_y;
   uint32_t num_blocks_z;
+  bool print_resource_usage;
 };
 
 /// The arguments to the '_begin' kernel.
diff --git a/libc/utils/gpu/loader/Main.cpp b/libc/utils/gpu/loader/Main.cpp
index b711ec91c9f30..dfaee4d857826 100644
--- a/libc/utils/gpu/loader/Main.cpp
+++ b/libc/utils/gpu/loader/Main.cpp
@@ -20,7 +20,8 @@
 
 int main(int argc, char **argv, char **envp) {
   if (argc < 2) {
-    printf("USAGE: ./loader [--threads <n>, --blocks <n>] <device_image> "
+    printf("USAGE: ./loader [--threads <n>, --blocks <n>, "
+           "--print-resource-usage] <device_image> "
            "<args>, ...\n");
     return EXIT_SUCCESS;
   }
@@ -62,6 +63,9 @@ int main(int argc, char **argv, char **envp) {
           offset + 1 < argc ? strtoul(argv[offset + 1], &ptr, 10) : 1;
       offset++;
       continue;
+    } else if (argv[offset] == std::string("--print-resource-usage")) {
+      params.print_resource_usage = true;
+      continue;
     } else {
       file = fopen(argv[offset], "r");
       if (!file) {
diff --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index 012cb778ecf15..90e52ddb008da 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -229,6 +229,17 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
   return CUDA_SUCCESS;
 }
 
+void print_resource_usage(CUmodule binary, const char *kernel_name) {
+  CUfunction function;
+  if (CUresult err = cuModuleGetFunction(&function, binary, kernel_name))
+    handle_error(err);
+  int num_regs;
+  if (CUresult err =
+          cuFuncGetAttribute(&num_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, function))
+    handle_error(err);
+  fprintf(stderr, "%6s registers: %d\n", kernel_name, num_regs);
+}
+
 int load(int argc, char **argv, char **envp, void *image, size_t size,
          const LaunchParameters &params) {
   if (CUresult err = cuInit(0))
@@ -341,6 +352,13 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
   if (CUresult err = cuStreamSynchronize(stream))
     handle_error(err);
 
+  // Print resource usage if requested.
+  if (params.print_resource_usage) {
+    print_resource_usage(binary, "_begin");
+    print_resource_usage(binary, "_start");
+    print_resource_usage(binary, "_end");
+  }
+
   end_args_t fini_args = {host_ret};
   if (CUresult err = launch_kernel(binary, stream, rpc_device,
                                    single_threaded_params, "_end", fini_args))

jhuber6 · 2024-07-03T01:07:28Z

libc/utils/gpu/loader/Loader.h

@@ -28,6 +28,7 @@ struct LaunchParameters {
  uint32_t num_blocks_x;
  uint32_t num_blocks_y;
  uint32_t num_blocks_z;
+  bool print_resource_usage;


This isn't a launch parameter, it should just be a separate option. Honestly I'm wondering if I should just port this to use the LLVM commandline handling now that it's always built w/ LLVM.

Ah, so you mean that checking resource usage shouldn't require launching a kernel, right?

No, launch_parameters are specifically arguments to the kernel launch. The API expects stuff like the number of threads, blocks, dynamic memory, the stream, etc. This isn't related to that since the API doesn't use it.

Ah, that makes sense. I can move resource usage logic out of the kernel launch logic

Move this to a separate global and pass it to the function. Honestly I'm thinking I should just rewrite this to be a pure LLVM utility and just use their https://clang.llvm.org/docs/ClangCommandLineReference.html interface.

libc/utils/gpu/loader/nvptx/Loader.cpp

jhuber6 · 2024-07-03T01:08:27Z

libc/utils/gpu/loader/nvptx/Loader.cpp

@@ -341,6 +352,13 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
  if (CUresult err = cuStreamSynchronize(stream))
    handle_error(err);

+  // Print resource usage if requested.


Just make it do this when it's called.

jhuber6 · 2024-07-14T23:03:28Z

libc/benchmarks/gpu/CMakeLists.txt

+  # We want to dump kernel resource usage for GPU benchmarks
+  get_target_property(gpu_loader_exe libc.utils.gpu.loader "EXECUTABLE")
+  set(res_usage_cmd $<$<BOOL:${LIBC_TARGET_OS_IS_GPU}>:${gpu_loader_exe}>
+    ${CMAKE_CROSSCOMPILING_EMULATOR}


What's this do here? I was actually looking into working with this at some point, since it will allow me to test the other targets in the future.

I think I misinterpreted what we talked about before and made nvptx-loader --print-resource-usage only print and disable running the kernel - This was like the 2nd cmd that you run for nvptx-loader after launching the kernel just to see the register usage

jhuber6 · 2024-07-14T23:04:38Z

libc/utils/gpu/loader/Loader.h

@@ -28,6 +28,7 @@ struct LaunchParameters {
  uint32_t num_blocks_x;
  uint32_t num_blocks_y;
  uint32_t num_blocks_z;
+  bool print_resource_usage;


Move this to a separate global and pass it to the function. Honestly I'm thinking I should just rewrite this to be a pure LLVM utility and just use their https://clang.llvm.org/docs/ClangCommandLineReference.html interface.

jhuber6 · 2024-07-14T23:05:18Z

libc/utils/gpu/loader/amdgpu/Loader.cpp

@@ -326,6 +326,11 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent,
  return HSA_STATUS_SUCCESS;
 }

+void print_resources(void *image) {
+  fprintf(stderr, "Printing resource usage on AMDGPU is not supported yet.\n");
+  exit(EXIT_FAILURE);


Don't make it fail, it'll just make it more annoying when we try to run benchmarks on both.

jhuber6 · 2024-07-14T23:06:26Z

libc/utils/gpu/loader/nvptx/Loader.cpp

+  printf("%6s registers: %d\n", kernel_name, num_regs);
+}
+
+void print_resources(void *image) {


I think this function should just print something like

Executing kernel <name>: blah blah blah

And that logic is guarded in the launch_kernel function. So, more of a diagnostic thing.

Ah, so you still want kernels to launch normally, right? I think I misinterpreted one of your other comments and made this a separate thing (e.g. nvptx-loader --print-resource-usage just prints the registers without running the kernel

Yeah, I figured it would just be an option that the benchmarks use to print verbose information on kernel launch so the user can see it. However, it's definitely true that it's not objectively useful if it includes all the benchmarking overhead. So we may need some name arguments if someone wants to make a dummy kernel or something.

Similar to that, do we want benchmarks to always print resource usage? Or should we expect users to manually call nvptx-loader to see the kernel register usage?

We can just make them always print the information.

jhuber6 · 2024-07-15T01:39:28Z

libc/cmake/modules/LLVMLibCTestRules.cmake

@@ -553,7 +553,7 @@ function(add_libc_hermetic test_name)
  endif()
  cmake_parse_arguments(
    "HERMETIC_TEST"
-    "IS_BENCHMARK" # Optional arguments
+    "IS_GPU_BENCHMARK" # Optional arguments


Can we handle this with LOADER_ARGS and LINK_LIBRARIES?

libc/benchmarks/gpu/CMakeLists.txt

llvm-ci · 2024-07-17T21:19:39Z

LLVM Buildbot has detected a new failure on builder openmp-offload-libc-amdgpu-runtime running on omp-vega20-1 while building libc at step 5 "compile-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/2053

Here is the relevant piece of the build log for the reference:

Step 5 (compile-openmp) failure: build (failure)
...
0.532 [3626/14/592] Linking CXX static library lib/libLibcTableGenUtil.a
0.532 [3625/14/593] Linking CXX static library lib/libllvm_gtest_main.a
0.532 [3625/13/594] Linking CXX static library lib/libLLVMDebugInfoCodeView.a
0.608 [3625/12/595] Linking CXX executable bin/UnicodeNameMappingGenerator
0.618 [3625/11/596] Linking CXX executable bin/split-file
0.642 [3625/10/597] Linking CXX executable bin/not
0.644 [3625/9/598] Linking CXX executable bin/reduce-chunk-list
0.648 [3625/8/599] Linking CXX executable bin/yaml-bench
0.668 [3625/7/600] Linking CXX executable bin/llvm-undname
0.675 [3625/6/601] Building CXX object projects/libc/utils/gpu/loader/amdgpu/CMakeFiles/amdhsa-loader.dir/Loader.cpp.o
FAILED: projects/libc/utils/gpu/loader/amdgpu/CMakeFiles/amdhsa-loader.dir/Loader.cpp.o 
ccache /usr/bin/c++ -DLIBC_COPT_ARRAY_ARG_LIST -DLIBC_COPT_PRINTF_DISABLE_INDEX_MODE -DLIBC_COPT_PRINTF_DISABLE_WRITE_INT -DLIBC_COPT_USE_C_ASSERT -DLIBC_NAMESPACE=__llvm_libc_19_0_0_git -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.build/projects/libc/utils/gpu/loader/amdgpu -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/include -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc -I/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/server -isystem /opt/rocm/include -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG -UNDEBUG -Wno-attributes -std=c++17 -MD -MT projects/libc/utils/gpu/loader/amdgpu/CMakeFiles/amdhsa-loader.dir/Loader.cpp.o -MF projects/libc/utils/gpu/loader/amdgpu/CMakeFiles/amdhsa-loader.dir/Loader.cpp.o.d -o projects/libc/utils/gpu/loader/amdgpu/CMakeFiles/amdhsa-loader.dir/Loader.cpp.o -c /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp: In function ‘void print_kernel_resources(char*)’:
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:129:11: error: cannot convert ‘const char*’ to ‘FILE*’ {aka ‘_IO_FILE*’}
  129 |   fprintf("Kernel resources on AMDGPU is not supported yet.\n");
      |           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      |           |
      |           const char*
In file included from /usr/include/stdio.h:867,
                 from /usr/include/c++/9/cstdio:42,
                 from /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/Loader.h:19,
                 from /home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:16:
/usr/include/x86_64-linux-gnu/bits/stdio2.h:98:27: note:   initializing argument 1 of ‘int fprintf(FILE*, const char*, ...)’
   98 | fprintf (FILE *__restrict __stream, const char *__restrict __fmt, ...)
      |          ~~~~~~~~~~~~~~~~~^~~~~~~~
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp: In instantiation of ‘hsa_status_t launch_kernel(hsa_agent_t, hsa_executable_t, hsa_amd_memory_pool_t, hsa_amd_memory_pool_t, hsa_queue_t*, rpc_device_t, const LaunchParameters&, const char*, args_t, bool) [with args_t = begin_args_t; hsa_agent_t = hsa_agent_s; hsa_executable_t = hsa_executable_s; hsa_amd_memory_pool_t = hsa_amd_memory_pool_s; hsa_queue_t = hsa_queue_s; rpc_device_t = rpc_device_s]’:
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:559:71:   required from here
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:279:28: error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive]
  279 |     print_kernel_resources(kernel_name);
      |                            ^~~~~~~~~~~
      |                            |
      |                            const char*
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:128:35: note:   initializing argument 1 of ‘void print_kernel_resources(char*)’
  128 | void print_kernel_resources(char *kernel_name) {
      |                             ~~~~~~^~~~~~~~~~~
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp: In instantiation of ‘hsa_status_t launch_kernel(hsa_agent_t, hsa_executable_t, hsa_amd_memory_pool_t, hsa_amd_memory_pool_t, hsa_queue_t*, rpc_device_t, const LaunchParameters&, const char*, args_t, bool) [with args_t = start_args_t; hsa_agent_t = hsa_agent_s; hsa_executable_t = hsa_executable_s; hsa_amd_memory_pool_t = hsa_amd_memory_pool_s; hsa_queue_t = hsa_queue_s; rpc_device_t = rpc_device_s]’:
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:565:66:   required from here
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:279:28: error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive]
  279 |     print_kernel_resources(kernel_name);
      |                            ^~~~~~~~~~~
      |                            |
      |                            const char*
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:128:35: note:   initializing argument 1 of ‘void print_kernel_resources(char*)’
  128 | void print_kernel_resources(char *kernel_name) {
      |                             ~~~~~~^~~~~~~~~~~
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp: In instantiation of ‘hsa_status_t launch_kernel(hsa_agent_t, hsa_executable_t, hsa_amd_memory_pool_t, hsa_amd_memory_pool_t, hsa_queue_t*, rpc_device_t, const LaunchParameters&, const char*, args_t, bool) [with args_t = end_args_t; hsa_agent_t = hsa_agent_s; hsa_executable_t = hsa_executable_s; hsa_amd_memory_pool_t = hsa_amd_memory_pool_s; hsa_queue_t = hsa_queue_s; rpc_device_t = rpc_device_s]’:
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:586:71:   required from here
/home/ompworker/bbot/openmp-offload-libc-amdgpu-runtime/llvm.src/libc/utils/gpu/loader/amdgpu/Loader.cpp:279:28: error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive]
  279 |     print_kernel_resources(kernel_name);

This PR allows `nvptx-loader` to read the resource usage of `_start`, `_begin`, and `_end` when executing CUDA binaries. Example output: ``` $ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__ [ RUN ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper [ OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev _begin registers: 25 _start registers: 80 _end registers: 62 ``` --------- Co-authored-by: Joseph Huber <[email protected]>

Summary: This PR allows `nvptx-loader` to read the resource usage of `_start`, `_begin`, and `_end` when executing CUDA binaries. Example output: ``` $ nvptx-loader --print-resource-usage libc/benchmarks/gpu/src/ctype/libc.benchmarks.gpu.src.ctype.isalnum_benchmark.__build__ [ RUN ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper [ OK ] LlvmLibcIsAlNumGpuBenchmark.IsAlnumWrapper: 93 cycles, 76 min, 470 max, 23 iterations, 78000 ns, 80 stddev _begin registers: 25 _start registers: 80 _end registers: 62 ``` --------- Co-authored-by: Joseph Huber <[email protected]> Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251005

llvmbot added the libc label Jul 3, 2024

jhuber6 reviewed Jul 3, 2024

View reviewed changes

llvmbot added the backend:AMDGPU label Jul 6, 2024

jhuber6 reviewed Jul 14, 2024

View reviewed changes

jameshu15869 added 4 commits July 14, 2024 21:10

support basic resource usage using cuobjdump

7b19e59

add resource usage to nvptx-loader

a49e872

printing resource usage should be separate from launching kernels

5d4c84f

print kernel resources when launching kernels

34ae76a

jameshu15869 force-pushed the nvptx-resource-usage branch from 33d986a to 34ae76a Compare July 15, 2024 01:37

jhuber6 approved these changes Jul 17, 2024

View reviewed changes

jhuber6 reviewed Jul 17, 2024

View reviewed changes

libc/benchmarks/gpu/CMakeLists.txt Outdated Show resolved Hide resolved

Update libc/benchmarks/gpu/CMakeLists.txt

2133a17

jhuber6 merged commit 1ecffda into llvm:main Jul 17, 2024
4 of 5 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[libc] Add Kernel Resource Usage to nvptx-loader #97503

[libc] Add Kernel Resource Usage to nvptx-loader #97503

jameshu15869 commented Jul 3, 2024

llvmbot commented Jul 3, 2024 •

edited

Loading

jhuber6 Jul 3, 2024

jameshu15869 Jul 6, 2024

jhuber6 Jul 6, 2024

jameshu15869 Jul 6, 2024

jhuber6 Jul 14, 2024

jhuber6 Jul 3, 2024

jhuber6 Jul 14, 2024

jameshu15869 Jul 15, 2024

jhuber6 Jul 14, 2024

jhuber6 Jul 14, 2024

jhuber6 Jul 14, 2024

jameshu15869 Jul 15, 2024

jhuber6 Jul 15, 2024

jameshu15869 Jul 15, 2024

jhuber6 Jul 15, 2024

jhuber6 Jul 15, 2024

llvm-ci commented Jul 17, 2024

[libc] Add Kernel Resource Usage to nvptx-loader #97503

[libc] Add Kernel Resource Usage to nvptx-loader #97503

Conversation

jameshu15869 commented Jul 3, 2024

llvmbot commented Jul 3, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

llvm-ci commented Jul 17, 2024

llvmbot commented Jul 3, 2024 •

edited

Loading