typo fix

microsoft · Aug 5, 2024 · ac316fd · ac316fd
1 parent af16059
commit ac316fd
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 3 deletions.
diff --git a/integration/pytorch/bitblas_quant_linear.py b/integration/pytorch/bitblas_quant_linear.py
@@ -182,7 +182,7 @@ def pack(self, linear, scales, zeros=None):
                     (w[:, idx] + scale_zeros[:, g_idx]) / scales[:, g_idx]).to(torch.int)[:, None])
         intweight = torch.cat(intweight, dim=1)
         intweight = intweight.contiguous()
-        intweight = intweight.cpu().astype(np.int8)
+        intweight = intweight.cpu().to(torch.int8)
         # quantize to 4bit
         qw_np = general_compress(intweight, source_bits=self.bits, storage_dtype=np.int8)
         # do interleave for fast type conversion

diff --git a/testing/python/module/test_bitblas_linear.py b/testing/python/module/test_bitblas_linear.py
@@ -98,7 +98,7 @@ def correctness_weight_only_dequantize(
     inputs.append(torch.rand(output_shape, dtype=torch.float16).cuda())
 
     intweight = inputs[1]
-    intweight = intweight.cpu().astype(np.int8)
+    intweight = intweight.cpu().to(torch.int8)
     if source_format == "int":
         intweight = intweight + maxq
     if with_zeros:

diff --git a/testing/python/operators/test_general_matmul_ops.py b/testing/python/operators/test_general_matmul_ops.py
@@ -155,7 +155,7 @@ def matmul_torch_forward(M, N, K, A_dtype, W_dtype, accum_dtype, out_dtype, layo
     inputs.append(torch.rand(output_shape, dtype=torch.float16).cuda())
 
     intweight = inputs[1]
-    intweight = intweight.cpu().astype(np.int8)
+    intweight = intweight.cpu().to(torch.int8)
     if source_format == "int":
         intweight = intweight + maxq
     if with_zeros: