From b54f753354dbab6ba868ab3d0f57f2c4c463661f Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy Date: Thu, 22 Feb 2024 15:31:35 +0400 Subject: [PATCH] [CPU] Fixed BF16 Matmul inference precision (#22995) CPU plugin uses EnforceInferencePrecision routine for BF16 precision mark-up. Its logic assumes only activations precision is changed before Matmul op, while weights precision keeps w/o any changes. Since dnnlFCTypeMapping misses BF16 activation, FP32 weights optimized configuration for bf16, execution always happens in FP32 precision even user manually set infer_precision=bf16. This bug is not visible on FP16 IRs (since BF16+FP16 config is present), so only FP32 IRs affected. SInce save_model and ovc apply FP16 compression be default, the issue mostly applicable for pipelines which use a model directly after convert_model call. Cherry-picks: https://github.com/openvinotoolkit/openvino/pull/22994 --- .../src/nodes/executors/fullyconnected_implementations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index e13bab8501d5bf..b92b6ff16f9a1c 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -40,7 +40,7 @@ static const LayoutConfig dnnlFCLayoutConfig{LayoutType::ncsp, LayoutType::ncsp, // clang-format off static const TypeMapping dnnlFCTypeMapping { // {src, wei, bia, dst} pt - {{_bf16, _bf16, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, + {{_bf16, _bf16 | _f32, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, {{_f16, _f16, _any, _f16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, // integer precision outputs are not supported for float precision inputs {{_f32 | _bf16 | _f16, _any, _any, _i8 | _u8}, pt(bypass(), bypass(), use<0>(), use<0>())}, @@ -63,7 +63,7 @@ static const MappingNotation dnnlConvolutionMappingNotation { static const TypeMapping dnnlConvolutionTypeMapping { // {src, wei, bia, dst} pt - {{_bf16, _bf16, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, + {{_bf16, _bf16 | _f32, _any, _bf16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, {{_f16, _f16, _any, _f16 | _f32}, pt(bypass(), bypass(), use<3>(), use<3>())}, // integer precision outputs are not supported for float precision inputs {{_f32 | _bf16 | _f16, _any, _any, _i8 | _u8}, pt(bypass(), bypass(), use<0>(), use<0>())},