Skip to content

Commit

Permalink
[WebNN EP] Remove some constraints for CPU backend (microsoft#20900)
Browse files Browse the repository at this point in the history
Following constraints have been supported by WebNN TFLite backend:
- Concat: supports up to 4 inputs
- Matmul: supports broadcasting
- Resize: supports nearest mode
- Split: supports up to 4 outputs
  • Loading branch information
Honry authored Jun 6, 2024
1 parent da1f8f9 commit 52874f6
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 91 deletions.
6 changes: 3 additions & 3 deletions js/web/docs/webnn-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim
| LessOrEqual | ai.onnx(12-15, 16+) | lesserOrEqual ||| |
| Log | ai.onnx(7-12, 13+) | log ||| |
| LpPool | ai.onnx(7-10, 11-17, 18+) | l2Pool2d ||| Only supports 4-D input, 2-D 'kernel_shape', 'p' value is 2 |
| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul ||| WebNN CPU doesn't support broadcasting for MatMul |
| MatMul | ai.onnx(7-8, 9-12, 13+) | matmul ||| |
| Max | ai.onnx(7, 8-11, 12, 13+) | max ||| |
| MaxPool | ai.onnx(7, 8-9, 10, 11, 12+) | maxPool2d ||| Only supports 4-D input, 2-D 'kernel_shape', 'storage_order' != 1, one output |
| Min | ai.onnx(7, 8-11, 12, 13+) | min ||| |
Expand All @@ -73,15 +73,15 @@ operators and the supported opset domain/versions in **WebNN EP** by ONNX Runtim
| ReduceSumSquare | ai.onnx(7-10, 11-12, 13-17, 18+) | reduceSumSquare ||| Input 'axes' if present should be a constant |
| Relu | ai.onnx(7-12, 13, 14+) | relu ||| |
| Reshape | ai.onnx(7-12, 13, 14-18, 19-20, 21+) | reshape ||| Input 'shape' should be a constant, 0 dimension value in 'shape' is not supported |
| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d ||| Only supports 4-D input, exclude_outside != 0, input 'scales' and 'sizes' if present must be a constant, WebNN CPU backend only supports 'linear' mode, WebNN GPU backend only supports 'linear' and 'nearest' modes |
| Resize | ai.onnx(11-12, 13-17, 18, 19+) | resample2d ||| Only supports 4-D input, exclude_outside != 0, input 'scales' and 'sizes' if present must be a constant, 'linear' and 'nearest' modes |
| Shape | ai.onnx(7-12, 13-14, 15-18, 19-20, 21+) | slice ||| |
| Sigmoid | ai.onnx(7-12, 13+) | sigmoid ||| |
| Softplus | ai.onnx(7+) | softplus ||| |
| Softsign | ai.onnx(7+) | softsign ||| |
| Sin | ai.onnx(7+) | sin ||| |
| Slice | ai.onnx(7-9, 10, 11-12, 13+) | slice ||| Input 'starts', 'ends', 'axes', and 'steps' if present must be a constant, only supports 'steps' value 1 |
| Softmax | ai.onnx(7-10, 11-12, 13+) | softmax ||| Only supports input rank >= 2 |
| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split ||| Input 'split' if present should be a constant, WebNN CPU backend only supports up to 4 outputs |
| Split | ai.onnx(7-10, 11-12, 13-17, 18+) | split ||| Input 'split' if present should be a constant |
| Sqrt | ai.onnx(7-12, 13+) | sqrt ||| |
| Squeeze | ai.onnx(7-10, 11-12, 13-20, 21+) | reshape ||| Input 'axes' if present should be a constant |
| Sub | ai.onnx(7-12, 13, 14+) | sub ||| |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,40 +36,14 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
NodeAttrHelper helper(node);
uint32_t axis = static_cast<uint32_t>(HandleNegativeAxis(helper.Get("axis", 1), rank));

const size_t num_inputs = input_defs.size();
std::vector<emscripten::val> inputs;
for (const auto* input : input_defs) {
LOGS(logger, VERBOSE) << "input name " << input->Name();
inputs.push_back(model_builder.GetOperand(input->Name()));
}

emscripten::val output = emscripten::val::undefined();
if (num_inputs <= 4 || model_builder.GetPreferredLayout() == DataLayout::NCHW) {
output = model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(inputs), axis);
} else {
// WebNN XNNPack backend only supports the concat with inputs number <= 4,
// decomposing the Concat with inputs number > 4 into multiple WebNN concat ops.
size_t remaining_inputs = num_inputs;
size_t max_inputs = 4;
while (remaining_inputs > 0) {
std::vector<emscripten::val> chunk_inputs;

// Push the last concated output to the next chunk_inputs.
if (output != emscripten::val::undefined()) {
chunk_inputs.push_back(output);
max_inputs = 3;
}

size_t chunk_size = std::min(remaining_inputs, max_inputs);

for (size_t i = 0; i < chunk_size; i++) {
chunk_inputs.push_back(inputs[num_inputs - remaining_inputs + i]);
}

output = model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(chunk_inputs), axis);
remaining_inputs -= chunk_size;
}
}
emscripten::val output =
model_builder.GetBuilder().call<emscripten::val>("concat", emscripten::val::array(inputs), axis);

model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(output));
return Status::OK();
Expand Down
41 changes: 6 additions & 35 deletions onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class GemmOpBuilder : public BaseOpBuilder {

// Operator support related.
private:
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
bool IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node,
const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;
bool HasSupportedInputsImpl(const Node& node, const WebnnDeviceType /* device_type */,
const logging::Logger& logger) const override;
Expand Down Expand Up @@ -64,13 +64,9 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
b = model_builder.GetBuilder().call<emscripten::val>("reshape", b,
emscripten::val::array(GetVecUint32FromVecInt64(b_shape)));
}
// The inputs of MatMul must be at least 3D for WebNN CPU backend. Use GEMM for 2D case.
// TODO: Remove this workaround when it is fixed in Chromium.
if (model_builder.GetWebnnDeviceType() == WebnnDeviceType::CPU && a_shape.size() == 2) {
output = model_builder.GetBuilder().call<emscripten::val>("gemm", a, b);
} else {
output = model_builder.GetBuilder().call<emscripten::val>("matmul", a, b);
}

output = model_builder.GetBuilder().call<emscripten::val>("matmul", a, b);

// If the inputs are both 1D, reduce the output to a scalar.
if (extended_a_shape && extended_b_shape) {
output = model_builder.GetBuilder().call<emscripten::val>("reshape", output, emscripten::val::array());
Expand Down Expand Up @@ -132,11 +128,10 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N

// Operator support related.

bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */,
const Node& node,
const WebnnDeviceType device_type,
const WebnnDeviceType /* device_type */,
const logging::Logger& logger) const {
(void)initializers;
const auto& op_type = node.OpType();
const auto& input_defs(node.InputDefs());
const size_t a_idx = 0, b_idx = 1, c_idx = 2; // A*B+C
Expand Down Expand Up @@ -194,30 +189,6 @@ bool GemmOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
}
}

if (op_type == "MatMul") {
// If the first argument is 1-D, it is promoted to a matrix by prepending a 1 to its dimensions.
// If the second argument is 1-D, it is promoted to a matrix by appending a 1 to its dimensions.
if (a_shape.size() == 1) a_shape.insert(a_shape.begin(), 1);
if (b_shape.size() == 1) b_shape.push_back(1);

// WebNN CPU backend has two more constraints.
// https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/ml/webnn/ml_graph_xnnpack.cc;l=1177
// TODO: Remove this workaround when Chromium enables broadcast for MatMul on WebNN CPU backend.
if (device_type == WebnnDeviceType::CPU) {
if (a_shape.size() != b_shape.size()) {
LOGS(logger, VERBOSE) << "The rank of two inputs for WebNN CPU backend MatMul must be the same.";
return false;
}

for (size_t i = 0; i < a_shape.size() - 2; i++) {
if (a_shape[i] != b_shape[i]) {
LOGS(logger, VERBOSE) << "WebNN CPU backend can't support broadcasting for MatMul.";
return false;
}
}
}
}

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class ResizeOpBuilder : public BaseOpBuilder {
// Operator support related.
private:
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const WebnnDeviceType device_type, const logging::Logger& logger) const override;
const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;

// Resize opset 10- is very different than Resize opset 11+, with many key attributes missing.
// We only support Resize opset 11+ here.
Expand Down Expand Up @@ -164,7 +164,7 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,

bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
const Node& node,
const WebnnDeviceType device_type,
const WebnnDeviceType /* device_type */,
const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();

Expand All @@ -184,18 +184,10 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
const auto mode = helper.Get("mode", "nearest");
bool is_linear_resize = mode == "linear";
bool is_nearest_resize = mode == "nearest";
// WebNN CPU backend only supports "linear" mode.
// WebNN GPU backend only supports "linear" and "nearest" modes.
if (device_type == WebnnDeviceType::CPU) {
if (!is_linear_resize) {
LOGS(logger, VERBOSE) << "Resize unsupported input mode, " << mode << " for CPU backend.";
return false;
}
} else {
if (!is_linear_resize && !is_nearest_resize) {
LOGS(logger, VERBOSE) << "Resize unsupported input mode, " << mode << " for GPU backend.";
return false;
}
// WebNN only supports "linear" and "nearest" modes.
if (!is_linear_resize && !is_nearest_resize) {
LOGS(logger, VERBOSE) << "Resize does not support input mode: " << mode;
return false;
}

const auto exclude_outside = helper.Get("exclude_outside", 0);
Expand Down
12 changes: 2 additions & 10 deletions onnxruntime/core/providers/webnn/builders/impl/split_op_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class SplitOpBuilder : public BaseOpBuilder {
// Operator support related.
private:
bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
const WebnnDeviceType device_type, const logging::Logger& logger) const override;
const WebnnDeviceType /* device_type */, const logging::Logger& logger) const override;
};

// Add operator related.
Expand Down Expand Up @@ -94,7 +94,7 @@ Status SplitOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,

bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
const Node& node,
const WebnnDeviceType device_type,
const WebnnDeviceType /* device_type */,
const logging::Logger& logger) const {
const auto& input_defs = node.InputDefs();
std::vector<int64_t> input_shape;
Expand Down Expand Up @@ -126,10 +126,6 @@ bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
LOGS(logger, VERBOSE) << "Cannot get split.";
return false;
}
if (split.size() > 4 && device_type == WebnnDeviceType::CPU) {
LOGS(logger, VERBOSE) << "WebNN CPU backend only supports up to 4 outputs.";
return false;
}
} else {
if (helper.HasAttr("num_outputs")) {
// Split has 'num_outputs' attribute when opset is 18.
Expand All @@ -138,10 +134,6 @@ bool SplitOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers,
LOGS(logger, VERBOSE) << "The 'num_outputs' must be a positive integer.";
return false;
}
if (num_outputs > 4 && device_type == WebnnDeviceType::CPU) {
LOGS(logger, VERBOSE) << "WebNN CPU backend only supports up to 4 outputs.";
return false;
}
} else {
const auto opset = node.SinceVersion();
if (opset >= 18) {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/webnn/builders/model_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class ModelBuilder {
void AddInitializerToSkip(const std::string& tensor_name);

// There are some input which will not be used, add it to a list which will not
// be added to CoreML model, since CoreML does not like input unused.
// be added to WebNN model, since WebNN does not like input unused.
void AddInputToSkip(const std::string& input_name);

std::string GetUniqueName(const std::string& base_name);
Expand Down

0 comments on commit 52874f6

Please sign in to comment.