.

FlagOpen · Nov 4, 2024 · 8a195cd · 8a195cd
1 parent 7ecdd12
commit 8a195cd
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 0 deletions.
diff --git a/src/flag_gems/ops/max.py b/src/flag_gems/ops/max.py
@@ -93,6 +93,8 @@ def max_kernel(
 
 def max(inp):
     logging.debug("GEMS MAX")
+    if not inp.is_contiguous():
+        inp = inp.contiguous()
     M = inp.numel()
     block_size = triton.next_power_of_2(math.ceil(math.sqrt(M)))
     mid_size = triton.cdiv(M, block_size)

diff --git a/tests/test_general_reduction_ops.py b/tests/test_general_reduction_ops.py
@@ -122,6 +122,18 @@ def test_accuracy_max_without_dim(shape, dtype):
 
     gems_assert_equal(res_out, ref_out)
 
+@pytest.mark.max
+@pytest.mark.parametrize("shape", REDUCTION_SHAPES)
+@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
+def test_accuracy_max_without_dim_uncontiguous(shape, dtype):
+    inp = torch.randn(shape, dtype=dtype, device="cuda")[::2,]
+    ref_inp = to_reference(inp)
+
+    ref_out = torch.max(ref_inp)
+    with flag_gems.use_gems():
+        res_out = torch.max(inp)
+
+    gems_assert_equal(res_out, ref_out)
 
 # TODO: failed at (200, 40999, 3), while successed at this shape in mean_dim
 @pytest.mark.max