[WebNN EP] Remove some constraints for CPU backend (microsoft#20900)

Following constraints have been supported by WebNN TFLite backend: - Concat: supports up to 4 inputs - Matmul: supports broadcasting - Resize: supports nearest mode - Split: supports up to 4 outputs
carzh · Jun 6, 2024 · 52874f6 · 52874f6
1 parent da1f8f9
commit 52874f6
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 91 deletions.
diff --git a/js/web/docs/webnn-operators.md b/js/web/docs/webnn-operators.md
@@ -50,7 +50,7 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim
 | LessOrEqual | ai.onnx(12-15, 16+) | lesserOrEqual | ✗ | ✓ | |
 | Log | ai.onnx(7-12, 13+) | log | ✗ | ✓ | |
 | LpPool | ai.onnx(7-10, 11-17, 18+) | l2Pool2d | ✗ | ✓ | Only supports 4-D input, 2-D 'kernel_shape', 'p' value is 2 |
-| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul | ✓ | ✓ | WebNN CPU doesn't support broadcasting for MatMul |
+| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul | ✓ | ✓ | |
 | Max | ai.onnx(7, 8-11, 12, 13+) | max | ✓ | ✓ | |
 | MaxPool | ai.onnx(7, 8-9, 10, 11, 12+) | maxPool2d | ✓ | ✓ | Only supports 4-D input, 2-D 'kernel_shape', 'storage_order' != 1, one output |
 | Min | ai.onnx(7, 8-11, 12, 13+) | min | ✓ | ✓ | |
@@ -73,15 +73,15 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim
 | ReduceSumSquare | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceSumSquare | ✗ | ✓ | Input 'axes' if present should be a constant |
 | Relu | ai.onnx(7-12, 13, 14+) | relu | ✓ | ✓ | |
 | Reshape | ai.onnx(7-12, 13, 14-18, 19-20, 21+) | reshape | ✓ | ✓ | Input 'shape' should be a constant, 0 dimension value in 'shape' is not supported |
-| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | ✓ | ✓ | Only supports 4-D input, exclude_outside != 0, input 'scales' and 'sizes' if present must be a constant, WebNN CPU backend only supports 'linear' mode, WebNN GPU backend only supports 'linear' and 'nearest' modes |
+| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d | ✓ | ✓ | Only supports 4-D input, exclude_outside != 0, input 'scales' and 'sizes' if present must be a constant, 'linear' and 'nearest' modes |
 | Shape | ai.onnx(7-12, 13-14, 15-18, 19-20, 21+) | slice | ✓ | ✓ | |
 | Sigmoid | ai.onnx(7-12, 13+) | sigmoid | ✓ | ✓ | |
 | Softplus | ai.onnx(7+) | softplus | ✗ | ✓ | |
 | Softsign | ai.onnx(7+) | softsign | ✗ | ✓ | |
 | Sin | ai.onnx(7+) | sin | ✗ | ✓ | |
 | Slice | ai.onnx(7-9, 10, 11-12, 13+) | slice | ✓ | ✓ | Input 'starts', 'ends', 'axes', and 'steps' if present must be a constant, only supports 'steps' value 1 |
 | Softmax | ai.onnx(7-10, 11-12, 13+) | softmax | ✓ | ✓ | Only supports input rank >= 2 |
-| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split | ✓ | ✓ | Input 'split' if present should be a constant, WebNN CPU backend only supports up to 4 outputs |
+| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split | ✓ | ✓ | Input 'split' if present should be a constant |
 | Sqrt | ai.onnx(7-12, 13+) | sqrt | ✓ | ✓ | |
 | Squeeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape | ✓ | ✓ | Input 'axes' if present should be a constant |
 | Sub | ai.onnx(7-12, 13, 14+) | sub | ✓ | ✓ | |

diff --git a/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/concat_op_builder.cc
@@ -36,40 +36,14 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
   NodeAttrHelper helper(node);
   uint32_t axis = static_cast<uint32_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));
 
-  const size_t num_inputs = input_defs.size();
   std::vector<emscripten::val> inputs;
   for (const auto* input : input_defs) {
     LOGS(logger, VERBOSE) << "input name " << input->Name();
     inputs.push_back(model_builder.GetOperand(input->Name()));
   }
 
-  emscripten::val output = emscripten::val::undefined();
-  if (num_inputs <= 4 || model_builder.GetPreferredLayout() == DataLayout::NCHW) {
-    output = model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(inputs), axis);
-  } else {
-    // WebNN XNNPack backend only supports the concat with inputs number <= 4,
-    // decomposing the Concat with inputs number > 4 into multiple WebNN concat ops.
-    size_t remaining_inputs = num_inputs;
-    size_t max_inputs = 4;
-    while (remaining_inputs > 0) {
-      std::vector<emscripten::val> chunk_inputs;
-
-      // Push the last concated output to the next chunk_inputs.
-      if (output != emscripten::val::undefined()) {
-        chunk_inputs.push_back(output);
-        max_inputs = 3;
-      }
-
-      size_t chunk_size = std::min(remaining_inputs, max_inputs);
-
-      for (size_t i = 0; i < chunk_size; i++) {
-        chunk_inputs.push_back(inputs[num_inputs - remaining_inputs + i]);
-      }
-
-      output = model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(chunk_inputs), axis);
-      remaining_inputs -= chunk_size;
-    }
-  }
+  emscripten::val output =
+      model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(inputs), axis);
 
   model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output));
   return Status::OK();

diff --git a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc
@@ -23,7 +23,7 @@ class GemmOpBuilder : public BaseOpBuilder {
 
   // Operator support related.
  private:
-  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
+  bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node,
                          const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;
   bool HasSupportedInputsImpl(const Node& node, const WebnnDeviceType /* device_type */,
                               const logging::Logger& logger) const override;
@@ -64,13 +64,9 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
       b = model_builder.GetBuilder().call<emscripten::val>("reshape", b,
                                                            emscripten::val::array(GetVecUint32FromVecInt64(b_shape)));
     }
-    // The inputs of MatMul must be at least 3D for WebNN CPU backend. Use GEMM for 2D case.
-    // TODO: Remove this workaround when it is fixed in Chromium.
-    if (model_builder.GetWebnnDeviceType() == WebnnDeviceType::CPU && a_shape.size() == 2) {
-      output = model_builder.GetBuilder().call<emscripten::val>("gemm", a, b);
-    } else {
-      output = model_builder.GetBuilder().call<emscripten::val>("matmul", a, b);
-    }
+
+    output = model_builder.GetBuilder().call<emscripten::val>("matmul", a, b);
+
     // If the inputs are both 1D， reduce the output to a scalar.
     if (extended_a_shape && extended_b_shape) {
       output = model_builder.GetBuilder().call<emscripten::val>("reshape", output, emscripten::val::array());
@@ -132,11 +128,10 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
 
 // Operator support related.
 
-bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
+bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */,
                                       const Node& node,
-                                      const WebnnDeviceType device_type,
+                                      const WebnnDeviceType /* device_type */,
                                       const logging::Logger& logger) const {
-  (void)initializers;
   const auto& op_type = node.OpType();
   const auto& input_defs(node.InputDefs());
   const size_t a_idx = 0, b_idx = 1, c_idx = 2;  // A*B+C
@@ -194,30 +189,6 @@ bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
     }
   }
 
-  if (op_type == "MatMul") {
-    // If the first argument is 1-D, it is promoted to a matrix by prepending a 1 to its dimensions.
-    // If the second argument is 1-D, it is promoted to a matrix by appending a 1 to its dimensions.
-    if (a_shape.size() == 1) a_shape.insert(a_shape.begin(), 1);
-    if (b_shape.size() == 1) b_shape.push_back(1);
-
-    // WebNN CPU backend has two more constraints.
-    // https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/ml/webnn/ml_graph_xnnpack.cc;l=1177
-    // TODO: Remove this workaround when Chromium enables broadcast for MatMul on WebNN CPU backend.
-    if (device_type == WebnnDeviceType::CPU) {
-      if (a_shape.size() != b_shape.size()) {
-        LOGS(logger, VERBOSE) << "The rank of two inputs for WebNN CPU backend MatMul must be the same.";
-        return false;
-      }
-
-      for (size_t i = 0; i < a_shape.size() - 2; i++) {
-        if (a_shape[i] != b_shape[i]) {
-          LOGS(logger, VERBOSE) << "WebNN CPU backend can't support broadcasting for MatMul.";
-          return false;
-        }
-      }
-    }
-  }
-
   return true;
 }
 

diff --git a/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc
@@ -30,7 +30,7 @@ class ResizeOpBuilder : public BaseOpBuilder {
   // Operator support related.
  private:
   bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
-                         const WebnnDeviceType device_type, const logging::Logger& logger) const override;
+                         const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;
 
   // Resize opset 10- is very different than Resize opset 11+, with many key attributes missing.
   // We only support Resize opset 11+ here.
@@ -164,7 +164,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
 bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
                                         const Node& node,
-                                        const WebnnDeviceType device_type,
+                                        const WebnnDeviceType /* device_type */,
                                         const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
 
@@ -184,18 +184,10 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
     const auto mode = helper.Get("mode", "nearest");
     bool is_linear_resize = mode == "linear";
     bool is_nearest_resize = mode == "nearest";
-    // WebNN CPU backend only supports "linear" mode.
-    // WebNN GPU backend only supports "linear" and "nearest" modes.
-    if (device_type == WebnnDeviceType::CPU) {
-      if (!is_linear_resize) {
-        LOGS(logger, VERBOSE) << "Resize unsupported input mode, " << mode << " for CPU backend.";
-        return false;
-      }
-    } else {
-      if (!is_linear_resize && !is_nearest_resize) {
-        LOGS(logger, VERBOSE) << "Resize unsupported input mode, " << mode << " for GPU backend.";
-        return false;
-      }
+    // WebNN only supports "linear" and "nearest" modes.
+    if (!is_linear_resize && !is_nearest_resize) {
+      LOGS(logger, VERBOSE) << "Resize does not support input mode: " << mode;
+      return false;
     }
 
     const auto exclude_outside = helper.Get("exclude_outside", 0);

diff --git a/onnxruntime/core/providers/webnn/builders/impl/split_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/split_op_builder.cc
@@ -27,7 +27,7 @@ class SplitOpBuilder : public BaseOpBuilder {
   // Operator support related.
  private:
   bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
-                         const WebnnDeviceType device_type, const logging::Logger& logger) const override;
+                         const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;
 };
 
 // Add operator related.
@@ -94,7 +94,7 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
 bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
                                        const Node& node,
-                                       const WebnnDeviceType device_type,
+                                       const WebnnDeviceType /* device_type */,
                                        const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
   std::vector<int64_t> input_shape;
@@ -126,10 +126,6 @@ bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
       LOGS(logger, VERBOSE) << "Cannot get split.";
       return false;
     }
-    if (split.size() > 4 && device_type == WebnnDeviceType::CPU) {
-      LOGS(logger, VERBOSE) << "WebNN CPU backend only supports up to 4 outputs.";
-      return false;
-    }
   } else {
     if (helper.HasAttr("num_outputs")) {
       // Split has 'num_outputs' attribute when opset is 18.
@@ -138,10 +134,6 @@ bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
         LOGS(logger, VERBOSE) << "The 'num_outputs' must be a positive integer.";
         return false;
       }
-      if (num_outputs > 4 && device_type == WebnnDeviceType::CPU) {
-        LOGS(logger, VERBOSE) << "WebNN CPU backend only supports up to 4 outputs.";
-        return false;
-      }
     } else {
       const auto opset = node.SinceVersion();
       if (opset >= 18) {

diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h
@@ -53,7 +53,7 @@ class ModelBuilder {
   void AddInitializerToSkip(const std::string& tensor_name);
 
   // There are some input which will not be used, add it to a list which will not
-  // be added to CoreML model, since CoreML does not like input unused.
+  // be added to WebNN model, since WebNN does not like input unused.
   void AddInputToSkip(const std::string& input_name);
 
   std::string GetUniqueName(const std::string& base_name);