[CPU] Enabled FP16 Compressed FC on models with PagedAttention

dmitry-gorokhov · Oct 11, 2024 · 438403b · 438403b
1 parent 79e229b
commit 438403b
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
@@ -18,6 +18,7 @@
 #include "utils/denormals.hpp"
 #include "utils/precision_support.h"
 #include "weights_cache.hpp"
+#include "openvino/op/paged_attention.hpp"
 
 #if defined(__linux__)
 #    include <signal.h>
@@ -196,9 +197,9 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
     if (op::util::has_op_with_type<op::v1::Convolution>(model) ||
         op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model))
         return Config::ModelType::CNN;
-    
-    if (op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) &&
-        model->get_variables().size() > 0)
+
+    if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
+         op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
         return Config::ModelType::LLM;
 
     return Config::ModelType::Unknown;