From a488113062b7231197ace8522ab3cab535c77d0b Mon Sep 17 00:00:00 2001 From: Shangdi Yu Date: Wed, 31 Jul 2024 23:28:24 +0000 Subject: [PATCH] [AOTI] Fix bfloat16 in CPU (#132150) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #122986 - add "typedef at::BFloat16 bfloat16;" to the header of generated cpp file - Supress warning: comparison of integer expressions of different signedness: ‘long unsigned int’ and ‘int64_t’ {aka ‘long int’} [-Wsign-compare] 436 | if (tensor.numel() != numel) { Pull Request resolved: https://github.com/pytorch/pytorch/pull/132150 Approved by: https://github.com/chenyang78, https://github.com/desertfire --- test/inductor/test_aot_inductor.py | 5 ----- torch/_inductor/codegen/aoti_runtime/implementation.cpp | 2 +- torch/_inductor/codegen/cpp_wrapper_cpu.py | 2 ++ torch/_inductor/codegen/cpp_wrapper_cuda.py | 2 -- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py index d65e9f2f25595..95829968c6572 100644 --- a/test/inductor/test_aot_inductor.py +++ b/test/inductor/test_aot_inductor.py @@ -3252,11 +3252,6 @@ def fail_non_abi_compatible_cuda(is_skip=False): "test_index_put_with_none_index": fail_minimal_arrayref_interface(is_skip=True), # FIXME: failed with Segfault while exiting the Python runtime "test_constant": fail_stack_allocation(is_skip=True), - # C++ compile error, need for aoti_torch___scaled_dot_product_flash_attention_for_cpu - # https://github.com/pytorch/pytorch/issues/122986 - "test_sdpa": fail_with_and_without_stack_allocation(is_skip=True), - # The same issue as https://github.com/pytorch/pytorch/issues/122986 - "test_sdpa_2": fail_with_and_without_stack_allocation(is_skip=True), # Looks like the same issue as https://github.com/pytorch/pytorch/issues/122978 "test_shifted_constraint_ranges": fail_with_and_without_stack_allocation( is_skip=True diff --git a/torch/_inductor/codegen/aoti_runtime/implementation.cpp b/torch/_inductor/codegen/aoti_runtime/implementation.cpp index 4869825cadabd..0273aa9aa8df0 100644 --- a/torch/_inductor/codegen/aoti_runtime/implementation.cpp +++ b/torch/_inductor/codegen/aoti_runtime/implementation.cpp @@ -76,7 +76,7 @@ void convert_handles_to_inputs( } template -void assert_numel(const ArrayRefTensor& tensor, int64_t numel) { +void assert_numel(const ArrayRefTensor& tensor, uint64_t numel) { if (tensor.numel() != numel) { std::stringstream err; err << "incorrect numel for input tensor. expected " << numel << ", got " << tensor.numel(); diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py index b95772ebc4cd3..7123f3d0ed8d1 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cpu.py +++ b/torch/_inductor/codegen/cpp_wrapper_cpu.py @@ -170,6 +170,8 @@ def write_header(self): #include """ ) + self.header.splice("typedef at::Half half;") + self.header.splice("typedef at::BFloat16 bfloat16;") else: self.header.splice( """ diff --git a/torch/_inductor/codegen/cpp_wrapper_cuda.py b/torch/_inductor/codegen/cpp_wrapper_cuda.py index 8eed428de07a9..bb6c508233c9d 100644 --- a/torch/_inductor/codegen/cpp_wrapper_cuda.py +++ b/torch/_inductor/codegen/cpp_wrapper_cuda.py @@ -43,8 +43,6 @@ def write_header(self): super().write_header() self.header.splice("#include ") - self.header.splice("typedef at::Half half;") - self.header.splice("typedef at::BFloat16 bfloat16;") if config.abi_compatible: self.header.splice( "#include "