[AOTI] Fix bfloat16 in CPU (pytorch#132150)

Fixes pytorch#122986 - add "typedef at::BFloat16 bfloat16;" to the header of generated cpp file - Supress warning: comparison of integer expressions of different signedness: ‘long unsigned int’ and ‘int64_t’ {aka ‘long int’} [-Wsign-compare] 436 | if (tensor.numel() != numel) { Pull Request resolved: pytorch#132150 Approved by: https://github.com/chenyang78, https://github.com/desertfire
xuanzhang816 · Jul 31, 2024 · a488113 · a488113
1 parent 6b28af1
commit a488113
Show file tree

Hide file tree

Showing 4 changed files with 3 additions and 8 deletions.
diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py
@@ -3252,11 +3252,6 @@ def fail_non_abi_compatible_cuda(is_skip=False):
     "test_index_put_with_none_index": fail_minimal_arrayref_interface(is_skip=True),
     # FIXME: failed with Segfault while exiting the Python runtime
     "test_constant": fail_stack_allocation(is_skip=True),
-    # C++ compile error, need for aoti_torch___scaled_dot_product_flash_attention_for_cpu
-    # https://github.com/pytorch/pytorch/issues/122986
-    "test_sdpa": fail_with_and_without_stack_allocation(is_skip=True),
-    # The same issue as https://github.com/pytorch/pytorch/issues/122986
-    "test_sdpa_2": fail_with_and_without_stack_allocation(is_skip=True),
     # Looks like the same issue as https://github.com/pytorch/pytorch/issues/122978
     "test_shifted_constraint_ranges": fail_with_and_without_stack_allocation(
         is_skip=True

diff --git a/torch/_inductor/codegen/aoti_runtime/implementation.cpp b/torch/_inductor/codegen/aoti_runtime/implementation.cpp
@@ -76,7 +76,7 @@ void convert_handles_to_inputs(
 }
 
 template <typename T>
-void assert_numel(const ArrayRefTensor<T>& tensor, int64_t numel) {
+void assert_numel(const ArrayRefTensor<T>& tensor, uint64_t numel) {
   if (tensor.numel() != numel) {
     std::stringstream err;
     err << "incorrect numel for input tensor. expected " << numel << ", got " << tensor.numel();

diff --git a/torch/_inductor/codegen/cpp_wrapper_cpu.py b/torch/_inductor/codegen/cpp_wrapper_cpu.py
@@ -170,6 +170,8 @@ def write_header(self):
                     #include <torch/csrc/inductor/aoti_runtime/model.h>
                     """
                 )
+            self.header.splice("typedef at::Half half;")
+            self.header.splice("typedef at::BFloat16 bfloat16;")
         else:
             self.header.splice(
                 """

diff --git a/torch/_inductor/codegen/cpp_wrapper_cuda.py b/torch/_inductor/codegen/cpp_wrapper_cuda.py
@@ -43,8 +43,6 @@ def write_header(self):
         super().write_header()
 
         self.header.splice("#include <filesystem>")
-        self.header.splice("typedef at::Half half;")
-        self.header.splice("typedef at::BFloat16 bfloat16;")
         if config.abi_compatible:
             self.header.splice(
                 "#include <torch/csrc/inductor/aoti_runtime/utils_cuda.h>"