From b849ed6348752cad17a4fbeb01dcf7769574c04c Mon Sep 17 00:00:00 2001
From: Konstantin Schwarz <konstantin.schwarz@amd.com>
Date: Fri, 2 Aug 2024 23:11:51 +0100
Subject: [PATCH] [AIE2] Change scl2vec intrinsic shift amount type to unsigned
 int

This matches the type in the low level intrinsic spec
---
 clang/lib/Headers/aiev2_scl2vec.h             | 86 +++++++++++--------
 .../aie/aie2/aie2-scl2vec-intrinsic.cpp       | 48 +++++------
 2 files changed, 72 insertions(+), 62 deletions(-)

diff --git a/clang/lib/Headers/aiev2_scl2vec.h b/clang/lib/Headers/aiev2_scl2vec.h
index 8a9e767373ac..bc2ad84e20e7 100644
--- a/clang/lib/Headers/aiev2_scl2vec.h
+++ b/clang/lib/Headers/aiev2_scl2vec.h
@@ -11,128 +11,138 @@
 #ifndef __AIEV2_SCL2VEC_H__
 #define __AIEV2_SCL2VEC_H__
 
-INTRINSIC(v128int4) shiftx(v128int4 a, v128int4 b, int step, int shift) {
+INTRINSIC(v128int4)
+shiftx(v128int4 a, v128int4 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, int shift) {
+INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v32int16) shiftx(v32int16 a, v32int16 b, int step, int shift) {
+INTRINSIC(v32int16)
+shiftx(v32int16 a, v32int16 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v16int32) shiftx(v16int32 a, v16int32 b, int step, int shift) {
+INTRINSIC(v16int32)
+shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v128uint4) shiftx(v128uint4 a, v128uint4 b, int step, int shift) {
+INTRINSIC(v128uint4)
+shiftx(v128uint4 a, v128uint4 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v64uint8) shiftx(v64uint8 a, v64uint8 b, int step, int shift) {
+INTRINSIC(v64uint8)
+shiftx(v64uint8 a, v64uint8 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v32uint16) shiftx(v32uint16 a, v32uint16 b, int step, int shift) {
+INTRINSIC(v32uint16)
+shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v16uint32) shiftx(v16uint32 a, v16uint32 b, int step, int shift) {
+INTRINSIC(v16uint32)
+shiftx(v16uint32 a, v16uint32 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
 #if 0
-INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, int shift) {
+INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, unsigned int shift) {
     return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
-INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, int shift) {
+INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, unsigned int shift) {
     return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
 #endif
 INTRINSIC(v32bfloat16)
-shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
+shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_bf512_bf512(a, b, step, shift);
 }
 INTRINSIC(v16accfloat)
-shiftx(v16accfloat a, v16accfloat b, int step, int shift) {
+shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) {
   return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
 
-INTRINSIC(v16float) shiftx(v16float a, v16float b, int step, int shift) {
-    return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
+INTRINSIC(v16float)
+shiftx(v16float a, v16float b, int step, unsigned int shift) {
+  return __builtin_aiev2_vshift_I512_I512(a, b, step, shift);
 }
 
-INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, int shift) {
+INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, int shift) {
+INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, int shift) {
+INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, int shift) {
+INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, int shift) {
+INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, int shift) {
+INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, int shift) {
+INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, int shift) {
+INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
 #if 0
-INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, int shift) {
+INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, int shift) {
+INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
 #endif
-INTRINSIC(v32bfloat16) shift_bytes(v32bfloat16 a, v32bfloat16 b, int shift) {
+INTRINSIC(v32bfloat16)
+shift_bytes(v32bfloat16 a, v32bfloat16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
-INTRINSIC(v16accfloat) shift_bytes(v16accfloat a, v16accfloat b, int shift) {
+INTRINSIC(v16accfloat)
+shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
 
-INTRINSIC(v16float) shift_bytes(v16float a, v16float b, int shift) {
+INTRINSIC(v16float) shift_bytes(v16float a, v16float b, unsigned int shift) {
   return shiftx(a, b, 0, shift);
 }
 
-INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, int shift) {
+INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 1);
 }
-INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, int shift) {
+INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 2);
 }
-INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, int shift) {
+INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 4);
 }
-INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, int shift) {
+INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 1);
 }
-INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, int shift) {
+INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 2);
 }
-INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, int shift) {
+INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 4);
 }
 #if 0
-INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, int shift) {
+INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 4);
 }
-INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, int shift) {
+INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 8);
 }
 #endif
-INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, int shift) {
+INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 2);
 }
-INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, int shift) {
+INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 4);
 }
 
-INTRINSIC(v16float) shift(v16float a, v16float b, int shift) {
+INTRINSIC(v16float) shift(v16float a, v16float b, unsigned int shift) {
   return shiftx(a, b, 0, shift * 4);
 }
 
diff --git a/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
index e5c7cc532c35..df939e418d77 100644
--- a/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
+++ b/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp
@@ -11,16 +11,16 @@
 // RUN: %clang -O2 %s --target=aie2 -nostdlibinc -S -emit-llvm -o - | FileCheck %s
 
 
-// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ii(
+// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ij(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
 // CHECK-NEXT:    ret <16 x i32> [[TMP0]]
 //
-v16int32 test_shiftx(v16int32 a, v16int32 b, int step, int shift) {
+v16int32 test_shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) {
   return shiftx(a,b,step,shift);
 }
 
-// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ii(
+// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ij(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <32 x i16> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <32 x i16> [[B:%.*]] to <16 x i32>
@@ -28,11 +28,11 @@ v16int32 test_shiftx(v16int32 a, v16int32 b, int step, int shift) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <32 x i16>
 // CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 //
-v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, int shift) {
+v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) {
   return shiftx(a,b,step,shift);
 }
 
-// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_i(
+// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_j(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32>
@@ -40,11 +40,11 @@ v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, int shift) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
 // CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 //
-v64int8 test_shift_bytes(v64int8 a, v64int8 b, int shift) {
+v64int8 test_shift_bytes(v64int8 a, v64int8 b, unsigned int shift) {
   return shift_bytes(a,b,shift);
 }
 
-// CHECK-LABEL: @_Z10test_shiftDv64_hS_i(
+// CHECK-LABEL: @_Z10test_shiftDv64_hS_j(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32>
@@ -52,7 +52,7 @@ v64int8 test_shift_bytes(v64int8 a, v64int8 b, int shift) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8>
 // CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 //
-v64uint8 test_shift(v64uint8 a, v64uint8 b, int shift_by) {
+v64uint8 test_shift(v64uint8 a, v64uint8 b, unsigned int shift_by) {
    return shift(a,b,shift_by);
 }
 
@@ -1117,7 +1117,7 @@ unsigned long long test_ext_u64(v16int32 v, int idx, int sign) {
 
 /* Test Intrinsic using ACCFLOAT type */
 
-// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ii(
+// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ij(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
@@ -1125,11 +1125,11 @@ unsigned long long test_ext_u64(v16int32 v, int idx, int sign) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
 // CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 //
-v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, int shift) {
+v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) {
   return shiftx(a,b,step,shift);
 }
 
-// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_i(
+// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_j(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
@@ -1137,20 +1137,20 @@ v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, int shift) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
 // CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 //
-v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, int shift) {
+v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) {
   return shift_bytes(a,b,shift);
 }
 
-// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_i(
+// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_j(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2
+// CHECK-NEXT:    [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
 // CHECK-NEXT:    ret <8 x i64> [[TMP3]]
 //
-v16accfloat test_shift(v16accfloat a, v16accfloat b, int shift_by) {
+v16accfloat test_shift(v16accfloat a, v16accfloat b, unsigned int shift_by) {
   return shift(a, b, shift_by);
 }
 
@@ -1176,12 +1176,12 @@ v16float test_broadcast_to_v16float (float b) {
    return  broadcast_to_v16float(b);
 }
 
-// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ii(
+// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ij(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vshift.bf512.bf512(<32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]])
 // CHECK-NEXT:    ret <32 x bfloat> [[TMP0]]
 //
-v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) {
+v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) {
     return shiftx(a, b, step, shift);
 }
 
@@ -1358,7 +1358,7 @@ v16float test_shuffle(v16float a, unsigned int mode) {
   return shuffle(a, mode);
 }
 
-// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ii(
+// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ij(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
@@ -1366,11 +1366,11 @@ v16float test_shuffle(v16float a, unsigned int mode) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
 // CHECK-NEXT:    ret <16 x float> [[TMP3]]
 //
-v16float test_shiftx(v16float a, v16float b, int step, int shift) {
+v16float test_shiftx(v16float a, v16float b, int step, unsigned int shift) {
   return shiftx(a,b,step,shift);
 }
 
-// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_i(
+// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_j(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
@@ -1378,20 +1378,20 @@ v16float test_shiftx(v16float a, v16float b, int step, int shift) {
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
 // CHECK-NEXT:    ret <16 x float> [[TMP3]]
 //
-v16float test_shift_bytes(v16float a, v16float b, int shift) {
+v16float test_shift_bytes(v16float a, v16float b, unsigned int shift) {
   return shift_bytes(a, b, shift);
 }
 
-// CHECK-LABEL: @_Z10test_shiftDv16_fS_i(
+// CHECK-LABEL: @_Z10test_shiftDv16_fS_j(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2
+// CHECK-NEXT:    [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float>
 // CHECK-NEXT:    ret <16 x float> [[TMP3]]
 //
-v16float test_shift(v16float a, v16float b, int shift_by) {
+v16float test_shift(v16float a, v16float b, unsigned int shift_by) {
   return shift(a, b, shift_by);
 }