diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 9d1b39143016b..1070627d5e7da 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -291,12 +291,50 @@ if (onnxruntime_USE_ROCM) message(FATAL_ERROR "ROCM does not support build with CUDA!") endif() + # replicate strategy used by pytorch to get ROCM_VERSION + # https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake + # with modification + if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n") + file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW) + string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW}) + elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm_version.h ****\n") + file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW) + string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) + elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h") + message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h ****\n") + file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW) + string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) + endif() + + if (ROCM_VERSION_MATCH) + set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1}) + set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2}) + set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3}) + set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}") + math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}") + + message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}") + message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}") + message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}") + message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}") + message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}") + else() + message(FATAL_ERROR "Cannot determine ROCm version string") + endif() + + if (NOT CMAKE_HIP_COMPILER) set(CMAKE_HIP_COMPILER "${onnxruntime_ROCM_HOME}/llvm/bin/clang++") endif() if (NOT CMAKE_HIP_ARCHITECTURES) - set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201") + if (ROCM_VERSION_DEV VERSION_LESS "6.2") + message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2") + else() + set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201") + endif() endif() file(GLOB rocm_cmake_components ${onnxruntime_ROCM_HOME}/lib/cmake/*) @@ -328,35 +366,6 @@ if (onnxruntime_USE_ROCM) set(onnxruntime_HIPIFY_PERL ${HIPIFY_PERL_PATH}/hipify-perl) endif() - # replicate strategy used by pytorch to get ROCM_VERSION - # https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake - # with modification - if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version") - file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW) - string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW}) - elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h") - file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW) - string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) - elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h") - file(READ "${onnxruntime_ROCM_HOME}/include/rocm-core/rocm_version.h" ROCM_VERSION_H_RAW) - string(REGEX MATCH "\"([0-9]+)\.([0-9]+)\.([0-9]+).*\"" ROCM_VERSION_MATCH ${ROCM_VERSION_H_RAW}) - endif() - - if (ROCM_VERSION_MATCH) - set(ROCM_VERSION_DEV_MAJOR ${CMAKE_MATCH_1}) - set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2}) - set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3}) - set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}") - math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}") - else() - message(FATAL_ERROR "Cannot determine ROCm version string") - endif() - message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n") - message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}") - message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}") - message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}") - message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}") - message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}") message("\n***** HIP LANGUAGE CONFIG INFO ****\n") message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}") message("CMAKE_HIP_ARCHITECTURES: ${CMAKE_HIP_ARCHITECTURES}") diff --git a/dockerfiles/Dockerfile.migraphx b/dockerfiles/Dockerfile.migraphx index c3541a8bd3425..c5d998d503899 100644 --- a/dockerfiles/Dockerfile.migraphx +++ b/dockerfiles/Dockerfile.migraphx @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with MIGraphX integration #-------------------------------------------------------------------------- -FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1 +FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_BRANCH=main diff --git a/dockerfiles/Dockerfile.rocm b/dockerfiles/Dockerfile.rocm index c242933f677f0..bef8d7a5f47d2 100644 --- a/dockerfiles/Dockerfile.rocm +++ b/dockerfiles/Dockerfile.rocm @@ -5,7 +5,7 @@ # Dockerfile to run ONNXRuntime with ROCm integration #-------------------------------------------------------------------------- -FROM rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1 +FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_BRANCH=main diff --git a/dockerfiles/README.md b/dockerfiles/README.md index 7825940571769..9f83fc390eee7 100644 --- a/dockerfiles/README.md +++ b/dockerfiles/README.md @@ -292,7 +292,7 @@ Nothing else from ONNX Runtime source tree will be copied/installed to the image Note: When running the container you built in Docker, please either use 'nvidia-docker' command instead of 'docker', or use Docker command-line options to make sure NVIDIA runtime will be used and appropriate files mounted from host. Otherwise, CUDA libraries won't be found. You can also [set NVIDIA runtime as default in Docker](https://github.com/dusty-nv/jetson-containers#docker-default-runtime). ## MIGraphX -**Ubuntu 20.04, ROCm6.0, MIGraphX** +**Ubuntu 22.04, ROCm6.2.3, MIGraphX** 1. Build the docker image from the Dockerfile in this repository. ``` @@ -306,7 +306,7 @@ Note: When running the container you built in Docker, please either use 'nvidia- ``` ## ROCm -**Ubuntu 20.04, ROCm6.0** +**Ubuntu 22.04, ROCm6.2.3** 1. Build the docker image from the Dockerfile in this repository. ``` diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts index c0e1582c17de5..8feb8d7205fa1 100644 --- a/js/common/lib/tensor-impl.ts +++ b/js/common/lib/tensor-impl.ts @@ -179,7 +179,9 @@ export class Tensor implements TensorInterface { type !== 'uint64' && type !== 'int8' && type !== 'uint8' && - type !== 'bool' + type !== 'bool' && + type !== 'uint4' && + type !== 'int4' ) { throw new TypeError(`unsupported type "${type}" to create tensor from MLTensor`); } diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts index 17e2f4d37c91f..af918705b97e3 100644 --- a/js/common/lib/tensor.ts +++ b/js/common/lib/tensor.ts @@ -167,7 +167,9 @@ export declare namespace Tensor { | 'uint32' | 'int64' | 'uint64' - | 'bool'; + | 'bool' + | 'uint4' + | 'int4'; /** * represent where the tensor data is stored diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts index 37eb0e0edc67c..47304fdc64ae4 100644 --- a/js/web/lib/wasm/jsep/backend-webnn.ts +++ b/js/web/lib/wasm/jsep/backend-webnn.ts @@ -25,6 +25,8 @@ const onnxDataTypeToWebnnDataType = new Map([ [DataType.uint32, 'uint32'], [DataType.int64, 'int64'], [DataType.uint64, 'uint64'], + [DataType.int4, 'int4'], + [DataType.uint4, 'uint4'], [DataType.int8, 'int8'], [DataType.uint8, 'uint8'], [DataType.bool, 'uint8'], @@ -214,6 +216,8 @@ export class WebNNBackend { case 'int8': bufferView = new Int8Array(buffer); break; + case 'int4': + case 'uint4': case 'uint8': bufferView = new Uint8Array(buffer); break; diff --git a/js/web/lib/wasm/jsep/webnn/webnn.d.ts b/js/web/lib/wasm/jsep/webnn/webnn.d.ts index a2d4e9af23e44..2620168738dac 100644 --- a/js/web/lib/wasm/jsep/webnn/webnn.d.ts +++ b/js/web/lib/wasm/jsep/webnn/webnn.d.ts @@ -28,7 +28,7 @@ interface MLContext { } interface MLGraph {} type MLInputOperandLayout = 'nchw'|'nhwc'; -type MLOperandDataType = 'float32'|'float16'|'int32'|'uint32'|'int64'|'uint64'|'int8'|'uint8'; +type MLOperandDataType = 'float32'|'float16'|'int32'|'uint32'|'int64'|'uint64'|'int8'|'uint8'|'int4'|'uint4'; interface MLOperandDescriptor { dataType: MLOperandDataType; shape?: readonly number[]; diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts index ad2ff62587252..54071866be5c3 100644 --- a/js/web/lib/wasm/wasm-common.ts +++ b/js/web/lib/wasm/wasm-common.ts @@ -252,7 +252,9 @@ export const isMLTensorSupportedType = (type: Tensor.Type): type is Tensor.MLTen type === 'uint64' || type === 'int8' || type === 'uint8' || - type === 'bool'; + type === 'bool' || + type === 'uint4' || + type === 'int4'; /** * Map string data location to integer value diff --git a/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc b/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc index 94480c308b99f..51625b83b8f61 100644 --- a/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc +++ b/onnxruntime/core/providers/migraphx/gpu_data_transfer.cc @@ -57,7 +57,7 @@ common::Status GPUDataTransfer::CopyTensorAsync(const Tensor& src, Tensor& dst, HIP_CALL_THROW(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToDevice, static_cast(stream.GetHandle()))); } else { // copy from other CPU memory to GPU, this is blocking - HIP_CALL_THROW(hipMemcpy(dst_data, src_data, bytes, hipMemcpyHostToDevice)); + HIP_CALL_THROW(hipMemcpyWithStream(dst_data, src_data, bytes, hipMemcpyHostToDevice, static_cast(stream.GetHandle()))); } } else if (src_device.Type() == OrtDevice::GPU) { HIP_CALL_THROW(hipMemcpyAsync(dst_data, src_data, bytes, hipMemcpyDeviceToHost, static_cast(stream.GetHandle()))); diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index e41cd577b0b21..dca38480434fe 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -1445,7 +1445,11 @@ Status MIGraphXExecutionProvider::Compile(const std::vector& std::vector ort_shape{res_lens.begin(), res_lens.end()}; auto output_tensor = ctx.GetOutput(i, ort_shape.data(), ort_shape.size()); void* output_data = output_tensor.GetTensorMutableRawData(); - HIP_CALL_THROW(hipMemcpy(output_data, gpu_res.data(), res_shape.bytes(), hipMemcpyDeviceToDevice)); + HIP_CALL_THROW(hipMemcpyWithStream(output_data, + gpu_res.data(), + res_shape.bytes(), + hipMemcpyDeviceToDevice, + static_cast(rocm_stream))); } } }; diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc index b90c7d76a6507..4b39e03ffc788 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.cc +++ b/onnxruntime/core/providers/webnn/builders/helper.cc @@ -69,17 +69,16 @@ bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const We } } -bool IsInputSupported(const NodeArg& input, const std::string& parent_name, const logging::Logger& logger) { - const auto& input_name = input.Name(); - const auto* shape_proto = input.Shape(); +bool IsTensorShapeSupported(const NodeArg& node_arg, const std::string& parent_name, const logging::Logger& logger) { + const auto& node_arg_name = node_arg.Name(); + const auto* shape_proto = node_arg.Shape(); // Optional tensors can be indicated by an empty name, just ignore it. - if (input_name.empty()) { + if (node_arg_name.empty()) { return true; } - // We do not support input with no shape. + // We do not support input/output with no shape. if (!shape_proto) { - LOGS(logger, VERBOSE) << "Input [" << input_name << "] of [" << parent_name - << "] has not shape"; + LOGS(logger, VERBOSE) << "Node arg [" << node_arg_name << "] of [" << parent_name << "] has not shape"; return false; } @@ -87,8 +86,7 @@ bool IsInputSupported(const NodeArg& input, const std::string& parent_name, cons // WebNN doesn't support dynamic shape - use sessionOptions.freeDimensionOverrides to fix the shape. if (!dim.has_dim_value()) { LOGS(logger, VERBOSE) << "Dynamic shape is not supported, " - << "use sessionOptions.FreeDimensionOverrides to set a fixed shape for input: " - << input_name; + << "use sessionOptions.FreeDimensionOverrides to set a fixed shape: " << node_arg_name; return false; } } @@ -104,7 +102,12 @@ std::vector> GetSupportedNodes(const GraphViewer& graph_v std::vector> supported_node_groups; for (const auto* input : graph_viewer.GetInputs()) { - if (!IsInputSupported(*input, "graph", logger)) { + if (!IsTensorShapeSupported(*input, "graph", logger)) { + return supported_node_groups; + } + } + for (const auto* output : graph_viewer.GetOutputs()) { + if (!IsTensorShapeSupported(*output, "graph", logger)) { return supported_node_groups; } } @@ -226,6 +229,12 @@ bool GetBidirectionalBroadcastShape(std::vector& shape_a, bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) { switch (data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + desc.set("dataType", emscripten::val("int4")); + return true; + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: + desc.set("dataType", emscripten::val("uint4")); + return true; case ONNX_NAMESPACE::TensorProto_DataType_BOOL: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: desc.set("dataType", emscripten::val("uint8")); diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h index ec9993bf138ba..aa3613551d8e1 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.h +++ b/onnxruntime/core/providers/webnn/builders/helper.h @@ -180,7 +180,7 @@ inline bool IsEmptyTensor(const InitializedTensorSet& initializers, const std::s return std::any_of(dims.begin(), dims.end(), [](auto d) { return d == 0; }); } -bool IsInputSupported(const NodeArg& node_arg, const std::string& parent_name, const logging::Logger& logger); +bool IsTensorShapeSupported(const NodeArg& node_arg, const std::string& parent_name, const logging::Logger& logger); // Get a list of groups of supported nodes, each group represents a subgraph supported by WebNN EP. std::vector> GetSupportedNodes(const GraphViewer& graph_viewer, @@ -303,6 +303,8 @@ inline bool GetWebNNOpType(const std::string& op_type, std::string& webnn_op_typ } static const InlinedHashMap onnx_to_webnn_data_type_map = { + {ONNX_NAMESPACE::TensorProto_DataType_INT4, "int4"}, + {ONNX_NAMESPACE::TensorProto_DataType_UINT4, "uint4"}, {ONNX_NAMESPACE::TensorProto_DataType_BOOL, "uint8"}, {ONNX_NAMESPACE::TensorProto_DataType_INT8, "int8"}, {ONNX_NAMESPACE::TensorProto_DataType_UINT8, "uint8"}, diff --git a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc index fffe964e6aaf2..1e641017f36b6 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.cc @@ -34,7 +34,7 @@ bool BaseOpBuilder::IsOpSupported(const InitializedTensorSet& initializers, cons if (!HasSupportedInputs(node, wnn_limits, logger)) return false; - if (!HasSupportedOutputsImpl(node, wnn_limits, logger)) + if (!HasSupportedOutputs(node, wnn_limits, logger)) return false; if (!HasSupportedOpSet(node, logger)) @@ -47,7 +47,7 @@ bool BaseOpBuilder::HasSupportedInputs(const Node& node, const emscripten::val& const logging::Logger& logger) const { const auto node_name = MakeString("Node [", node.Name(), "] type [", node.OpType(), "]"); for (const auto* input : node.InputDefs()) { - if (!IsInputSupported(*input, node_name, logger)) { + if (!IsTensorShapeSupported(*input, node_name, logger)) { return false; } } @@ -68,6 +68,18 @@ bool BaseOpBuilder::HasSupportedInputsImpl(const Node& node, return IsDataTypeSupportedByOp(op_type, input_type, wnn_limits, "input", "Input", logger); } +bool BaseOpBuilder::HasSupportedOutputs(const Node& node, const emscripten::val& wnn_limits, + const logging::Logger& logger) const { + const auto node_name = MakeString("Node [", node.Name(), "] type [", node.OpType(), "]"); + for (const auto* output : node.OutputDefs()) { + if (!IsTensorShapeSupported(*output, node_name, logger)) { + return false; + } + } + + return HasSupportedOutputsImpl(node, wnn_limits, logger); +} + bool BaseOpBuilder::HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const { diff --git a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h index 584455f62cb4e..a632876dab2b9 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h +++ b/onnxruntime/core/providers/webnn/builders/impl/base_op_builder.h @@ -54,6 +54,7 @@ class BaseOpBuilder : public IOpBuilder { private: bool HasSupportedOpSet(const Node& node, const logging::Logger& logger) const; bool HasSupportedInputs(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; + bool HasSupportedOutputs(const Node& node, const emscripten::val& wnn_limits, const logging::Logger& logger) const; }; } // namespace webnn diff --git a/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc index 3c4fc822f3d01..70ebe18c85b86 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/cast_op_builder.cc @@ -38,6 +38,12 @@ Status CastOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const auto to_type = helper.Get("to", ONNX_NAMESPACE::TensorProto_DataType_FLOAT); std::string operand_type; switch (to_type) { + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + operand_type = "int4"; + break; + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: + operand_type = "uint4"; + break; case ONNX_NAMESPACE::TensorProto_DataType_BOOL: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: operand_type = "uint8"; diff --git a/onnxruntime/core/providers/webnn/builders/model.cc b/onnxruntime/core/providers/webnn/builders/model.cc index fcfdb146bff34..231b65a4d1894 100644 --- a/onnxruntime/core/providers/webnn/builders/model.cc +++ b/onnxruntime/core/providers/webnn/builders/model.cc @@ -42,6 +42,8 @@ onnxruntime::common::Status Model::Compute(const InlinedHashMap(tensor.buffer))}; @@ -93,6 +95,8 @@ onnxruntime::common::Status Model::Compute(const InlinedHashMap(tensor.buffer))}; @@ -210,6 +214,8 @@ void Model::AllocateInputOutputBuffers() { const auto data_type = input_info.data_type; switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: wnn_inputs_.set(input, emscripten::val::global("Uint8Array").new_(num_elements)); break; @@ -245,6 +251,8 @@ void Model::AllocateInputOutputBuffers() { const auto data_type = output_info.data_type; switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: wnn_outputs_.set(output, emscripten::val::global("Uint8Array").new_(num_elements)); break; diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index 8a7fea0cde431..84f8cc4b14665 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -137,8 +137,16 @@ Status ModelBuilder::RegisterInitializers() { ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(tensor, unpacked_tensor)); tensor_ptr = reinterpret_cast(unpacked_tensor.data()); } + if (data_type == ONNX_NAMESPACE::TensorProto_DataType_INT4 || + data_type == ONNX_NAMESPACE::TensorProto_DataType_UINT4) { + // For WebNN int4 and uint4 tensors are stored in Uint8Array, + // so we need to adjust the number of elements. + num_elements = (static_cast(num_elements) + 1) / 2; + } switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: view = emscripten::val{emscripten::typed_memory_view(num_elements, reinterpret_cast(tensor_ptr))}; @@ -214,7 +222,7 @@ Status ModelBuilder::RegisterModelInputOutput(const NodeArg& node_arg, bool is_i if (!shape.empty()) { dims.reserve(shape.size()); for (const auto& dim : shape) { - // dim_param free dimensions should have already been excluded by IsInputSupported(). + // dim_param free dimensions should have already been excluded by IsTensorShapeSupported(). assert(dim.has_dim_value()); dims.push_back(SafeInt(dim.dim_value())); } @@ -392,6 +400,8 @@ const emscripten::val& ModelBuilder::GetZeroConstant(const int32_t& data_type) { switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_BOOL: + case ONNX_NAMESPACE::TensorProto_DataType_INT4: + case ONNX_NAMESPACE::TensorProto_DataType_UINT4: case ONNX_NAMESPACE::TensorProto_DataType_UINT8: zero_buffer = emscripten::val::global("Uint8Array").new_(1); break; diff --git a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc index 67fb35d26e6dc..559b521f18782 100644 --- a/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc +++ b/onnxruntime/test/providers/internal_testing/internal_testing_tests.cc @@ -159,7 +159,7 @@ TEST(InternalTestingEP, PreventSaveOfModelWithCompiledOps) { // the internal NHWC operators are only included as part of contrib ops currently. as the EP requests the NHWC // version of the ONNX operator when matching a static kernel, those are required. -#if !defined(DISABLE_CONTRIB_OPS) +#if !defined(DISABLE_CONTRIB_OPS) && !defined(USE_ROCM) TEST(InternalTestingEP, TestMixOfStaticAndCompiledKernels) { const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "transform/fusion/conv_relu_opset12.onnx"; @@ -256,10 +256,6 @@ TEST(InternalTestingEP, TestNhwcConversionOfStaticKernels) { run_test(ort_model_path); } -// This test can be deprecated now as the code logic has been changed so the model is not applicable -// TEST(InternalTestingEP, TestRegisterAllocatorHandlesUsageInMultipleSessions) { -//} - // make sure allocators returned by SessionState::GetAllocator are valid when IExecutionProvider::ReplaceAllocator // is used. if something is off InferenceSession::Initialize will fail. TEST(InternalTestingEP, TestReplaceAllocatorDoesntBreakDueToLocalAllocatorStorage) { diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index 1cf60b47b4ded..9e2d8e49a2292 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -37,9 +37,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 6.1 - - name: RocmVersionPatchSuffix - value: ".3" + value: 6.2.3 jobs: - job: Linux_Build @@ -66,7 +64,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) - task: Cache@2 @@ -165,7 +163,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) - task: CmdLine@2 diff --git a/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml index 50f3862761320..c730cc2548038 100644 --- a/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-rocm-ci-pipeline.yml @@ -37,9 +37,7 @@ variables: - name: render value: 109 - name: RocmVersion - value: 6.1 - - name: RocmVersionPatchSuffix - value: ".3" + value: 6.1.3 jobs: - job: Linux_Build @@ -66,7 +64,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion) - task: Cache@2 @@ -166,7 +164,7 @@ jobs: parameters: Dockerfile: tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile Context: tools/ci_build/github/linux/docker - DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)$(RocmVersionPatchSuffix)" + DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)" Repository: onnxruntimerocm-cibuild-rocm$(RocmVersion) - task: CmdLine@2 @@ -231,7 +229,11 @@ jobs: -e KERNEL_EXPLORER_TEST_USE_CUPY=1 \ -e CUPY_CACHE_DIR=/build/Release \ onnxruntimerocm-cibuild-rocm$(RocmVersion) \ - pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100 + /bin/bash -c " + set -ex; \ + python --version; \ + ls /opt/miniconda/envs/rocm-ci/lib/; \ + pytest /onnxruntime_src/onnxruntime/python/tools/kernel_explorer/ -n 4 --reruns 1 --durations=100" workingDirectory: $(Build.SourcesDirectory) displayName: 'Run kernel explorer tests' condition: succeededOrFailed() diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm index f63f508852fc2..e4c3af05053ba 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm @@ -6,7 +6,7 @@ ARG LD_LIBRARY_PATH_ARG=${DEVTOOLSET_ROOTPATH}/usr/lib64:${DEVTOOLSET_ROOTPATH}/ ARG PREPEND_PATH=${DEVTOOLSET_ROOTPATH}/usr/bin: FROM $BASEIMAGE AS base_image -ARG ROCM_VERSION=5.5 +ARG ROCM_VERSION=6.2.3 #Add our own dependencies ADD scripts /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile index 98ea5e119c319..51591e11ea2e9 100644 --- a/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=6.0 +ARG ROCM_VERSION=6.2.3 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -68,7 +68,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Create migraphx-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci ENV CONDA_DEFAULT_ENV migraphx-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.10 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Enable migraphx-ci environment @@ -80,4 +80,4 @@ RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bi # Install migraphx RUN apt update && apt install -y migraphx -RUN pip install numpy packaging ml_dtypes==0.3.0 +RUN pip install numpy packaging ml_dtypes==0.5.0 diff --git a/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile index 749e222aff499..f74c5c7b0295e 100644 --- a/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/linux/docker/rocm-ci-pipeline-env.Dockerfile @@ -1,7 +1,7 @@ # Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete FROM ubuntu:22.04 -ARG ROCM_VERSION=6.0 +ARG ROCM_VERSION=6.1.3 ARG AMDGPU_VERSION=${ROCM_VERSION} ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' @@ -67,26 +67,30 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86 # Create rocm-ci environment ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci ENV CONDA_DEFAULT_ENV rocm-ci -RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9 +RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.10 ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH} # Enable rocm-ci environment SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"] -# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found +# Some DLLs in the conda environment have conflict with the one installed in Ubuntu system. +# For example, the GCC version in the conda environment is 12.x, while the one in the Ubuntu 22.04 is 11.x. +# ln -sf to make sure we always use libstdc++.so.6 and libgcc_s.so.1 in the system. RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6 +RUN ln -sf /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libgcc_s.so.1 RUN pip install packaging \ - ml_dtypes==0.3.0 \ + ml_dtypes==0.5.0 \ pytest==7.4.4 \ pytest-xdist \ pytest-rerunfailures \ - scipy==1.10.0 \ - numpy==1.24.1 + scipy==1.14.1 \ + numpy==1.26.4 RUN apt install -y git # Install Cupy to decrease CPU utilization +# Note that the version of Cupy requires numpy < 1.27 RUN git clone https://github.com/ROCm/cupy && cd cupy && \ git checkout 432a8683351d681e00903640489cb2f4055d2e09 && \ export CUPY_INSTALL_USE_HIP=1 && \ diff --git a/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh b/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh index 269337bbba042..0be64d96f3a34 100755 --- a/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh +++ b/tools/ci_build/github/linux/docker/scripts/setup_rocm_yum_repo.sh @@ -2,7 +2,7 @@ set -e -x # version -ROCM_VERSION=6.0 +ROCM_VERSION=6.2.3 while getopts "r:" parameter_Option do case "${parameter_Option}"