From b849ed6348752cad17a4fbeb01dcf7769574c04c Mon Sep 17 00:00:00 2001 From: Konstantin Schwarz Date: Fri, 2 Aug 2024 23:11:51 +0100 Subject: [PATCH] [AIE2] Change scl2vec intrinsic shift amount type to unsigned int This matches the type in the low level intrinsic spec --- clang/lib/Headers/aiev2_scl2vec.h | 86 +++++++++++-------- .../aie/aie2/aie2-scl2vec-intrinsic.cpp | 48 +++++------ 2 files changed, 72 insertions(+), 62 deletions(-) diff --git a/clang/lib/Headers/aiev2_scl2vec.h b/clang/lib/Headers/aiev2_scl2vec.h index 8a9e767373ac..bc2ad84e20e7 100644 --- a/clang/lib/Headers/aiev2_scl2vec.h +++ b/clang/lib/Headers/aiev2_scl2vec.h @@ -11,128 +11,138 @@ #ifndef __AIEV2_SCL2VEC_H__ #define __AIEV2_SCL2VEC_H__ -INTRINSIC(v128int4) shiftx(v128int4 a, v128int4 b, int step, int shift) { +INTRINSIC(v128int4) +shiftx(v128int4 a, v128int4 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, int shift) { +INTRINSIC(v64int8) shiftx(v64int8 a, v64int8 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v32int16) shiftx(v32int16 a, v32int16 b, int step, int shift) { +INTRINSIC(v32int16) +shiftx(v32int16 a, v32int16 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v16int32) shiftx(v16int32 a, v16int32 b, int step, int shift) { +INTRINSIC(v16int32) +shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v128uint4) shiftx(v128uint4 a, v128uint4 b, int step, int shift) { +INTRINSIC(v128uint4) +shiftx(v128uint4 a, v128uint4 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v64uint8) shiftx(v64uint8 a, v64uint8 b, int step, int shift) { +INTRINSIC(v64uint8) +shiftx(v64uint8 a, v64uint8 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v32uint16) shiftx(v32uint16 a, v32uint16 b, int step, int shift) { +INTRINSIC(v32uint16) +shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v16uint32) shiftx(v16uint32 a, v16uint32 b, int step, int shift) { +INTRINSIC(v16uint32) +shiftx(v16uint32 a, v16uint32 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } #if 0 -INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, int shift) { +INTRINSIC(v16cint16) shiftx(v16cint16 a, v16cint16 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, int shift) { +INTRINSIC(v8cint32) shiftx(v8cint32 a, v8cint32 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } #endif INTRINSIC(v32bfloat16) -shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) { +shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) { return __builtin_aiev2_vshift_bf512_bf512(a, b, step, shift); } INTRINSIC(v16accfloat) -shiftx(v16accfloat a, v16accfloat b, int step, int shift) { +shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) { return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v16float) shiftx(v16float a, v16float b, int step, int shift) { - return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); +INTRINSIC(v16float) +shiftx(v16float a, v16float b, int step, unsigned int shift) { + return __builtin_aiev2_vshift_I512_I512(a, b, step, shift); } -INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, int shift) { +INTRINSIC(v128int4) shift_bytes(v128int4 a, v128int4 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, int shift) { +INTRINSIC(v64int8) shift_bytes(v64int8 a, v64int8 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, int shift) { +INTRINSIC(v32int16) shift_bytes(v32int16 a, v32int16 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, int shift) { +INTRINSIC(v16int32) shift_bytes(v16int32 a, v16int32 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, int shift) { +INTRINSIC(v128uint4) shift_bytes(v128uint4 a, v128uint4 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, int shift) { +INTRINSIC(v64uint8) shift_bytes(v64uint8 a, v64uint8 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, int shift) { +INTRINSIC(v32uint16) shift_bytes(v32uint16 a, v32uint16 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, int shift) { +INTRINSIC(v16uint32) shift_bytes(v16uint32 a, v16uint32 b, unsigned int shift) { return shiftx(a, b, 0, shift); } #if 0 -INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, int shift) { +INTRINSIC(v16cint16) shift_bytes(v16cint16 a, v16cint16 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, int shift) { +INTRINSIC(v8cint32) shift_bytes(v8cint32 a, v8cint32 b, unsigned int shift) { return shiftx(a, b, 0, shift); } #endif -INTRINSIC(v32bfloat16) shift_bytes(v32bfloat16 a, v32bfloat16 b, int shift) { +INTRINSIC(v32bfloat16) +shift_bytes(v32bfloat16 a, v32bfloat16 b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v16accfloat) shift_bytes(v16accfloat a, v16accfloat b, int shift) { +INTRINSIC(v16accfloat) +shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v16float) shift_bytes(v16float a, v16float b, int shift) { +INTRINSIC(v16float) shift_bytes(v16float a, v16float b, unsigned int shift) { return shiftx(a, b, 0, shift); } -INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, int shift) { +INTRINSIC(v64int8) shift(v64int8 a, v64int8 b, unsigned int shift) { return shiftx(a, b, 0, shift * 1); } -INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, int shift) { +INTRINSIC(v32int16) shift(v32int16 a, v32int16 b, unsigned int shift) { return shiftx(a, b, 0, shift * 2); } -INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, int shift) { +INTRINSIC(v16int32) shift(v16int32 a, v16int32 b, unsigned int shift) { return shiftx(a, b, 0, shift * 4); } -INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, int shift) { +INTRINSIC(v64uint8) shift(v64uint8 a, v64uint8 b, unsigned int shift) { return shiftx(a, b, 0, shift * 1); } -INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, int shift) { +INTRINSIC(v32uint16) shift(v32uint16 a, v32uint16 b, unsigned int shift) { return shiftx(a, b, 0, shift * 2); } -INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, int shift) { +INTRINSIC(v16uint32) shift(v16uint32 a, v16uint32 b, unsigned int shift) { return shiftx(a, b, 0, shift * 4); } #if 0 -INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, int shift) { +INTRINSIC(v16cint16) shift(v16cint16 a, v16cint16 b, unsigned int shift) { return shiftx(a, b, 0, shift * 4); } -INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, int shift) { +INTRINSIC(v8cint32) shift(v8cint32 a, v8cint32 b, unsigned int shift) { return shiftx(a, b, 0, shift * 8); } #endif -INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, int shift) { +INTRINSIC(v32bfloat16) shift(v32bfloat16 a, v32bfloat16 b, unsigned int shift) { return shiftx(a, b, 0, shift * 2); } -INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, int shift) { +INTRINSIC(v16accfloat) shift(v16accfloat a, v16accfloat b, unsigned int shift) { return shiftx(a, b, 0, shift * 4); } -INTRINSIC(v16float) shift(v16float a, v16float b, int shift) { +INTRINSIC(v16float) shift(v16float a, v16float b, unsigned int shift) { return shiftx(a, b, 0, shift * 4); } diff --git a/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp b/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp index e5c7cc532c35..df939e418d77 100644 --- a/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp +++ b/clang/test/CodeGen/aie/aie2/aie2-scl2vec-intrinsic.cpp @@ -11,16 +11,16 @@ // RUN: %clang -O2 %s --target=aie2 -nostdlibinc -S -emit-llvm -o - | FileCheck %s -// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ii( +// CHECK-LABEL: @_Z11test_shiftxDv16_iS_ij( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]]) // CHECK-NEXT: ret <16 x i32> [[TMP0]] // -v16int32 test_shiftx(v16int32 a, v16int32 b, int step, int shift) { +v16int32 test_shiftx(v16int32 a, v16int32 b, int step, unsigned int shift) { return shiftx(a,b,step,shift); } -// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ii( +// CHECK-LABEL: @_Z11test_shiftxDv32_tS_ij( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <32 x i16> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i16> [[B:%.*]] to <16 x i32> @@ -28,11 +28,11 @@ v16int32 test_shiftx(v16int32 a, v16int32 b, int step, int shift) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <32 x i16> // CHECK-NEXT: ret <32 x i16> [[TMP3]] // -v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, int shift) { +v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, unsigned int shift) { return shiftx(a,b,step,shift); } -// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_i( +// CHECK-LABEL: @_Z16test_shift_bytesDv64_aS_j( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32> @@ -40,11 +40,11 @@ v32uint16 test_shiftx(v32uint16 a, v32uint16 b, int step, int shift) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8> // CHECK-NEXT: ret <64 x i8> [[TMP3]] // -v64int8 test_shift_bytes(v64int8 a, v64int8 b, int shift) { +v64int8 test_shift_bytes(v64int8 a, v64int8 b, unsigned int shift) { return shift_bytes(a,b,shift); } -// CHECK-LABEL: @_Z10test_shiftDv64_hS_i( +// CHECK-LABEL: @_Z10test_shiftDv64_hS_j( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <64 x i8> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <64 x i8> [[B:%.*]] to <16 x i32> @@ -52,7 +52,7 @@ v64int8 test_shift_bytes(v64int8 a, v64int8 b, int shift) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <64 x i8> // CHECK-NEXT: ret <64 x i8> [[TMP3]] // -v64uint8 test_shift(v64uint8 a, v64uint8 b, int shift_by) { +v64uint8 test_shift(v64uint8 a, v64uint8 b, unsigned int shift_by) { return shift(a,b,shift_by); } @@ -1117,7 +1117,7 @@ unsigned long long test_ext_u64(v16int32 v, int idx, int sign) { /* Test Intrinsic using ACCFLOAT type */ -// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ii( +// CHECK-LABEL: @_Z11test_shiftxDv16_u10__accfloatS_ij( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> @@ -1125,11 +1125,11 @@ unsigned long long test_ext_u64(v16int32 v, int idx, int sign) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> // CHECK-NEXT: ret <8 x i64> [[TMP3]] // -v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, int shift) { +v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, unsigned int shift) { return shiftx(a,b,step,shift); } -// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_i( +// CHECK-LABEL: @_Z16test_shift_bytesDv16_u10__accfloatS_j( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> @@ -1137,20 +1137,20 @@ v16accfloat test_shiftx(v16accfloat a, v16accfloat b, int step, int shift) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> // CHECK-NEXT: ret <8 x i64> [[TMP3]] // -v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, int shift) { +v16accfloat test_shift_bytes(v16accfloat a, v16accfloat b, unsigned int shift) { return shift_bytes(a,b,shift); } -// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_i( +// CHECK-LABEL: @_Z10test_shiftDv16_u10__accfloatS_j( // CHECK-NEXT: entry: -// CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2 +// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> // CHECK-NEXT: ret <8 x i64> [[TMP3]] // -v16accfloat test_shift(v16accfloat a, v16accfloat b, int shift_by) { +v16accfloat test_shift(v16accfloat a, v16accfloat b, unsigned int shift_by) { return shift(a, b, shift_by); } @@ -1176,12 +1176,12 @@ v16float test_broadcast_to_v16float (float b) { return broadcast_to_v16float(b); } -// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ii( +// CHECK-LABEL: @_Z11test_shiftxDv32_u6__bf16S_ij( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call noundef <32 x bfloat> @llvm.aie2.vshift.bf512.bf512(<32 x bfloat> [[A:%.*]], <32 x bfloat> [[B:%.*]], i32 [[STEP:%.*]], i32 [[SHIFT:%.*]]) // CHECK-NEXT: ret <32 x bfloat> [[TMP0]] // -v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, int shift) { +v32bfloat16 test_shiftx(v32bfloat16 a, v32bfloat16 b, int step, unsigned int shift) { return shiftx(a, b, step, shift); } @@ -1358,7 +1358,7 @@ v16float test_shuffle(v16float a, unsigned int mode) { return shuffle(a, mode); } -// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ii( +// CHECK-LABEL: @_Z11test_shiftxDv16_fS_ij( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32> @@ -1366,11 +1366,11 @@ v16float test_shuffle(v16float a, unsigned int mode) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float> // CHECK-NEXT: ret <16 x float> [[TMP3]] // -v16float test_shiftx(v16float a, v16float b, int step, int shift) { +v16float test_shiftx(v16float a, v16float b, int step, unsigned int shift) { return shiftx(a,b,step,shift); } -// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_i( +// CHECK-LABEL: @_Z16test_shift_bytesDv16_fS_j( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32> @@ -1378,20 +1378,20 @@ v16float test_shiftx(v16float a, v16float b, int step, int shift) { // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float> // CHECK-NEXT: ret <16 x float> [[TMP3]] // -v16float test_shift_bytes(v16float a, v16float b, int shift) { +v16float test_shift_bytes(v16float a, v16float b, unsigned int shift) { return shift_bytes(a, b, shift); } -// CHECK-LABEL: @_Z10test_shiftDv16_fS_i( +// CHECK-LABEL: @_Z10test_shiftDv16_fS_j( // CHECK-NEXT: entry: -// CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[SHIFT_BY:%.*]], 2 +// CHECK-NEXT: [[MUL_I:%.*]] = shl i32 [[SHIFT_BY:%.*]], 2 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32> // CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.aie2.vshift.I512.I512(<16 x i32> [[TMP0]], <16 x i32> [[TMP1]], i32 0, i32 [[MUL_I]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP2]] to <16 x float> // CHECK-NEXT: ret <16 x float> [[TMP3]] // -v16float test_shift(v16float a, v16float b, int shift_by) { +v16float test_shift(v16float a, v16float b, unsigned int shift_by) { return shift(a, b, shift_by); }