Cuda runtime error in trt-llm gemm #91

yuanjiechen · 2023-09-16T09:27:19Z

环境

If applicable, please include the following:
CPU architecture: x86_64
GPU properties
GPU name: NVIDIA A10
Clock frequencies used: None
Libraries
TensorRT branch: 9.0.0
TensorRT LLM: 0.1.3
Cuda: 12.1.66
Cudnn: unknown
Container: registry.cn-hangzhou.aliyuncs.com%2ftrt-hackathon%2ftrt-hackathon%3afinal_v1
NVIDIA driver version: 525.105.17
OS: Ubuntu 5.15.0-73

复现步骤：
进入trtllm根目录，cd tests/quantization
执行： python -m unittest test_smooth_quant_gemm.py TestSmoothQuantGemm.test_matmul
即可看到cuda runtime error：what(): [TensorRT-LLM Error][int8gemm Runner] Failed to run cutlass int8 gemm. Error: Error Internal
报错代码位于 ./3rdparty/cutlass/include/cutlass/gemm/device/gemm_universal_base.h 的initialize函数，cudaFuncSetAttribute返回的cudaerror_t为1, 没有继续检察后续代码，更改一些参数重新编译仍然报错


Status initialize(Arguments const &args, void *workspace = nullptr, cudaStream_t stream = nullptr) {

    CUTLASS_TRACE_HOST("GemmUniversalBase::initialize() - workspace " 
      << workspace << ", stream: " << (stream ? "non-null" : "null"));

    size_t workspace_bytes = get_workspace_size(args);

    CUTLASS_TRACE_HOST("  workspace_bytes: " << workspace_bytes);

    if (workspace_bytes) {
      
      if (!workspace) {
        CUTLASS_TRACE_HOST("  error: device workspace must not be null");

        return Status::kErrorWorkspaceNull;
      }

      if (args.mode == GemmUniversalMode::kGemm) {
        CUTLASS_TRACE_HOST("  clearing device workspace");
        cudaError_t result = cudaMemsetAsync(workspace, 0, workspace_bytes, stream);

        if (result != cudaSuccess) {
          CUTLASS_TRACE_HOST("  cudaMemsetAsync() returned error " << cudaGetErrorString(result));

          return Status::kErrorInternal;
        }
      }
    }

    // Get CUDA grid shape
    cutlass::gemm::GemmCoord grid_tiled_shape;
    int gemm_k_size = 0;

    get_grid_shape_(grid_tiled_shape, gemm_k_size, args);

    // Initialize the Params structure
    params_ = typename GemmKernel::Params(
      args,
      grid_tiled_shape,
      gemm_k_size,
      static_cast<int *>(workspace)
    );
   
    // Specify shared memory capacity for kernel. 
    int smem_size = int(sizeof(typename GemmKernel::SharedStorage));

    if (smem_size >= (48 << 10)) {
      cudaError_t result = cudaFuncSetAttribute(Kernel<GemmKernel>,
                                    cudaFuncAttributeMaxDynamicSharedMemorySize,
                                    smem_size);

      if (result != cudaSuccess) {
        return Status::kErrorInternal;
      }
    }

    return Status::kSuccess;
  }

Tasks

Give feedback

No tasks being tracked yet.

Options

The text was updated successfully, but these errors were encountered:

foolerlyy · 2024-01-09T12:31:42Z

把CUTLASS_TRACE_HOST这个宏打开，详细看一下错误位置

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Cuda runtime error in trt-llm gemm #91

Cuda runtime error in trt-llm gemm #91

yuanjiechen commented Sep 16, 2023 •

edited

Loading

Tasks

foolerlyy commented Jan 9, 2024

Cuda runtime error in trt-llm gemm #91

Cuda runtime error in trt-llm gemm #91

Comments

yuanjiechen commented Sep 16, 2023 • edited Loading

Tasks

foolerlyy commented Jan 9, 2024

yuanjiechen commented Sep 16, 2023 •

edited

Loading