From 4bbace892026a59eab1b353ce0097835fd521167 Mon Sep 17 00:00:00 2001
From: li_jinxin <lijinxin6@huawei.com>
Date: Thu, 25 Apr 2024 17:14:16 +0800
Subject: [PATCH] [NPU]add ut coverage.

---
 .../unittests/test_index_sample_op_npu.py     | 18 +++++
 .../tests/unittests/test_meshgrid_op_npu.py   | 19 +++++
 .../tests/unittests/test_reduce_any_op_npu.py | 16 ++++
 .../tests/unittests/test_reduce_sum_op_npu.py |  6 ++
 .../npu/tests/unittests/test_swiglu_op_npu.py | 79 +++++++++++++++++--
 .../npu/tests/unittests/test_tile_op_npu.py   | 10 ++-
 .../tests/unittests/test_transpose_op_npu.py  |  6 ++
 7 files changed, 146 insertions(+), 8 deletions(-)

diff --git a/backends/npu/tests/unittests/test_index_sample_op_npu.py b/backends/npu/tests/unittests/test_index_sample_op_npu.py
index 7ded3f831..db02dd856 100644
--- a/backends/npu/tests/unittests/test_index_sample_op_npu.py
+++ b/backends/npu/tests/unittests/test_index_sample_op_npu.py
@@ -144,6 +144,24 @@ def config(self):
         self.index_type = "int64"
 
 
+class TestCase8(TestIndexSampleOp):
+    def config(self):
+        self.__class__.no_need_check_grad = True
+        self.x_shape = (1, 200)
+        self.dtype = "float32"
+        self.index_shape = (1, 200)
+        self.index_type = "int64"
+
+
+class TestCase9(TestIndexSampleOp):
+    def config(self):
+        self.__class__.no_need_check_grad = True
+        self.x_shape = (1, 200)
+        self.dtype = "float32"
+        self.index_shape = (1, 200)
+        self.index_type = "int32"
+
+
 class TestIndexSampleShape(unittest.TestCase):
     def test_shape(self):
         paddle.enable_static()
diff --git a/backends/npu/tests/unittests/test_meshgrid_op_npu.py b/backends/npu/tests/unittests/test_meshgrid_op_npu.py
index dd033580c..56a38ec89 100644
--- a/backends/npu/tests/unittests/test_meshgrid_op_npu.py
+++ b/backends/npu/tests/unittests/test_meshgrid_op_npu.py
@@ -293,5 +293,24 @@ def test_api_with_dygraph_tuple_input(self):
         paddle.enable_static()
 
 
+class TestMeshgridOp9(TestMeshgridOp):
+    def init_test_data(self):
+        self.shape = self.get_x_shape()
+        ins = []
+        outs = []
+        for i in range(len(self.shape)):
+            ins.append(np.random.random(self.shape[i]).astype(self.dtype))
+
+        for i in range(len(self.shape)):
+            out_reshape = [1] * len(self.shape)
+            out_reshape[i] = self.shape[i][0] if self.shape[i] else 1
+            out_temp = np.reshape(ins[i], out_reshape)
+            outs.append(np.broadcast_to(out_temp, [100, 1]))
+        return ins, outs
+
+    def get_x_shape(self):
+        return [[100], []]
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/backends/npu/tests/unittests/test_reduce_any_op_npu.py b/backends/npu/tests/unittests/test_reduce_any_op_npu.py
index d2d615148..7f84f599e 100644
--- a/backends/npu/tests/unittests/test_reduce_any_op_npu.py
+++ b/backends/npu/tests/unittests/test_reduce_any_op_npu.py
@@ -113,5 +113,21 @@ def test_check_output(self):
         self.check_output_with_place(self.place)
 
 
+class TestAnyOpWithInputDim(OpTest):
+    def setUp(self):
+        self.set_npu()
+        self.op_type = "reduce_any"
+        self.place = paddle.CustomPlace("npu", 0)
+        self.inputs = {"X": np.random.randint(0, 2, ()).astype("bool")}
+        self.attrs = {"dim": ()}
+        self.outputs = {"Out": self.inputs["X"]}
+
+    def set_npu(self):
+        self.__class__.use_custom_device = True
+
+    def test_check_output(self):
+        self.check_output_with_place(self.place)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/backends/npu/tests/unittests/test_reduce_sum_op_npu.py b/backends/npu/tests/unittests/test_reduce_sum_op_npu.py
index eecf10ee8..51739970f 100644
--- a/backends/npu/tests/unittests/test_reduce_sum_op_npu.py
+++ b/backends/npu/tests/unittests/test_reduce_sum_op_npu.py
@@ -89,6 +89,12 @@ def initTestCase(self):
         self.axis = (-1, -2)
 
 
+class TestReduceSumOp0D(TestReduceSum):
+    def initTestCase(self):
+        self.shape = ()
+        self.axis = ()
+
+
 @check_run_big_shape_test()
 class TestReduceSumOpRank1(TestReduceSum):
     def initTestCase(self):
diff --git a/backends/npu/tests/unittests/test_swiglu_op_npu.py b/backends/npu/tests/unittests/test_swiglu_op_npu.py
index 9dd89774c..c3fe5b263 100644
--- a/backends/npu/tests/unittests/test_swiglu_op_npu.py
+++ b/backends/npu/tests/unittests/test_swiglu_op_npu.py
@@ -121,9 +121,16 @@ def check_result(self, golden_res, fused_res):
             )
         golden_y, golden_dx, golden_dy = golden_res
         fused_y, fused_dx, fused_dy = fused_res
-        np.testing.assert_allclose(golden_y, fused_y, rtol=rtol, atol=atol)
-        np.testing.assert_allclose(golden_dx, fused_dx, rtol=rtol, atol=atol)
-        np.testing.assert_allclose(golden_dy, fused_dy, rtol=rtol, atol=atol)
+        if golden_dx is None and fused_dx is None:
+            np.testing.assert_allclose(golden_y, fused_y, rtol=rtol, atol=atol)
+            np.testing.assert_allclose(golden_dy, fused_dy, rtol=rtol, atol=atol)
+        elif golden_dy is None and fused_dy is None:
+            np.testing.assert_allclose(golden_y, fused_y, rtol=rtol, atol=atol)
+            np.testing.assert_allclose(golden_dx, fused_dx, rtol=rtol, atol=atol)
+        else:
+            np.testing.assert_allclose(golden_y, fused_y, rtol=rtol, atol=atol)
+            np.testing.assert_allclose(golden_dx, fused_dx, rtol=rtol, atol=atol)
+            np.testing.assert_allclose(golden_dy, fused_dy, rtol=rtol, atol=atol)
 
     def gen_input(self):
         x = np.random.randn(self.shape[0], self.shape[1])
@@ -135,9 +142,13 @@ def golden_swiglu(self, x, y=None):
         res.backward()
         if self.dtype == "bfloat16":
             res = convert_uint16_to_float(res.numpy())
-            dx = convert_uint16_to_float(x.grad.numpy())
-            dy = convert_uint16_to_float(y.grad.numpy())
+            dx = x.grad if x.grad is None else convert_uint16_to_float(x.grad.numpy())
+            dy = y.grad if y.grad is None else convert_uint16_to_float(y.grad.numpy())
             return res, dx, dy
+        if x.grad is None:
+            return res.numpy(), x.grad, y.grad.numpy()
+        if y.grad is None:
+            return res.numpy(), x.grad.numpy(), y.grad
         return res.numpy(), x.grad.numpy(), y.grad.numpy()
 
     def fused_swiglu(self, x, y):
@@ -145,9 +156,13 @@ def fused_swiglu(self, x, y):
         res.backward()
         if self.dtype == "bfloat16":
             res = convert_uint16_to_float(res.numpy())
-            dx = convert_uint16_to_float(x.grad.numpy())
-            dy = convert_uint16_to_float(y.grad.numpy())
+            dx = x.grad if x.grad is None else convert_uint16_to_float(x.grad.numpy())
+            dy = y.grad if y.grad is None else convert_uint16_to_float(y.grad.numpy())
             return res, dx, dy
+        if x.grad is None:
+            return res.numpy(), x.grad, y.grad.numpy()
+        if y.grad is None:
+            return res.numpy(), x.grad.numpy(), y.grad
         return res.numpy(), x.grad.numpy(), y.grad.numpy()
 
     @check_soc_version
@@ -174,6 +189,56 @@ def test_swiglu(self):
         self.check_result(golden_res, fused_res)
 
 
+class TestNPUSwigluOnlyX(TestNPUSwigluFP16BothXY):
+    @check_soc_version
+    def test_swiglu(self):
+        np_x, np_y = self.gen_input()
+        if self.dtype == "bfloat16":
+            np_x = convert_float_to_uint16(np_x)
+            np_y = convert_float_to_uint16(np_y)
+        golden_x = paddle.to_tensor(
+            np_x, place=self.npu_place, dtype=self.dtype, stop_gradient=True
+        )
+        golden_y = paddle.to_tensor(
+            np_y, place=self.npu_place, dtype=self.dtype, stop_gradient=False
+        )
+        fused_x = paddle.to_tensor(
+            np_x, place=self.npu_place, dtype=self.dtype, stop_gradient=True
+        )
+        fused_y = paddle.to_tensor(
+            np_y, place=self.npu_place, dtype=self.dtype, stop_gradient=False
+        )
+
+        golden_res = self.golden_swiglu(golden_x, golden_y)
+        fused_res = self.fused_swiglu(fused_x, fused_y)
+        self.check_result(golden_res, fused_res)
+
+
+class TestNPUSwigluOnlyY(TestNPUSwigluFP16BothXY):
+    @check_soc_version
+    def test_swiglu(self):
+        np_x, np_y = self.gen_input()
+        if self.dtype == "bfloat16":
+            np_x = convert_float_to_uint16(np_x)
+            np_y = convert_float_to_uint16(np_y)
+        golden_x = paddle.to_tensor(
+            np_x, place=self.npu_place, dtype=self.dtype, stop_gradient=False
+        )
+        golden_y = paddle.to_tensor(
+            np_y, place=self.npu_place, dtype=self.dtype, stop_gradient=True
+        )
+        fused_x = paddle.to_tensor(
+            np_x, place=self.npu_place, dtype=self.dtype, stop_gradient=False
+        )
+        fused_y = paddle.to_tensor(
+            np_y, place=self.npu_place, dtype=self.dtype, stop_gradient=True
+        )
+
+        golden_res = self.golden_swiglu(golden_x, golden_y)
+        fused_res = self.fused_swiglu(fused_x, fused_y)
+        self.check_result(golden_res, fused_res)
+
+
 class TestNPUSwigluBF16OnlyX(TestNPUSwigluFP16OnlyX):
     def init_dtype(self):
         self.dtype = "bfloat16"
diff --git a/backends/npu/tests/unittests/test_tile_op_npu.py b/backends/npu/tests/unittests/test_tile_op_npu.py
index 3de5c4af7..66e034ba4 100755
--- a/backends/npu/tests/unittests/test_tile_op_npu.py
+++ b/backends/npu/tests/unittests/test_tile_op_npu.py
@@ -250,9 +250,12 @@ def test_check_output(self):
 # Situation 7: input x is Double
 class TestTileOpDouble(unittest.TestCase):
     def setUp(self):
-        self.input = np.random.randint(10, size=(2, 10, 5)).astype("double")
+        self.init_input()
         self.place = paddle.CustomPlace("npu", 0)
 
+    def init_input(self):
+        self.input = np.random.randint(10, size=(2, 10, 5)).astype("double")
+
     def functional(self, place):
         input = paddle.to_tensor(self.input, place=place)
         input.stop_gradient = False
@@ -274,6 +277,11 @@ def test_npu(self):
         np.testing.assert_allclose(grad_npu, grad_cpu)
 
 
+class TestTileOpBool1(TestTileOpDouble):
+    def init_input(self):
+        self.input = np.random.randint(1, size=(2, 10, 5)).astype("bool")
+
+
 # Situation 8: input x is FP16
 class TestTileOpFloat16(OpTest):
     def setUp(self):
diff --git a/backends/npu/tests/unittests/test_transpose_op_npu.py b/backends/npu/tests/unittests/test_transpose_op_npu.py
index 9f55a397e..80c20b9fb 100644
--- a/backends/npu/tests/unittests/test_transpose_op_npu.py
+++ b/backends/npu/tests/unittests/test_transpose_op_npu.py
@@ -111,6 +111,12 @@ def init_shape_axis(self):
         self.axis = (6, 1, 3, 5, 0, 2, 4, 7)
 
 
+class TestCase10(TestTransposeOp):
+    def init_shape_axis(self):
+        self.shape = []
+        self.axis = []
+
+
 class TestTransposeOpFP16(TestTransposeOp):
     def init_dtype(self):
         self.dtype = np.float16