Skip to content

Commit

Permalink
Add fshr functionality and unit tests (#1217)
Browse files Browse the repository at this point in the history
* Add fshr functionality and unit tests

* Add space at EOF

* Fix some format issues
  • Loading branch information
Rekt3421 authored Sep 5, 2023
1 parent a0c7603 commit eda331e
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
42 changes: 42 additions & 0 deletions lib/ReplaceLLVMIntrinsicsPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ bool clspv::ReplaceLLVMIntrinsicsPass::runOnFunction(Function &F) {
switch (F.getIntrinsicID()) {
case Intrinsic::bswap:
return replaceBswap(F);
case Intrinsic::fshr:
return replaceFshr(F);
case Intrinsic::fshl:
return replaceFshl(F);
case Intrinsic::copysign:
Expand Down Expand Up @@ -182,6 +184,46 @@ bool clspv::ReplaceLLVMIntrinsicsPass::replaceBswap(Function &F) {
});
}

bool clspv::ReplaceLLVMIntrinsicsPass::replaceFshr(Function &F) {
return replaceCallsWithValue(F, [](CallInst *call) {
auto arg_hi = call->getArgOperand(0);
auto arg_lo = call->getArgOperand(1);
auto arg_shift = call->getArgOperand(2);

// Validate argument types with correct sizes.
auto type = arg_hi->getType();
if ((type->getScalarSizeInBits() != 8) &&
(type->getScalarSizeInBits() != 16) &&
(type->getScalarSizeInBits() != 32) &&
(type->getScalarSizeInBits() != 64)) {
return static_cast<Value *>(nullptr);
}

// We need the n LSB of the first arg and size-n MSB of the second arg
IRBuilder<> builder(call);

// The shift amount is treated modulo the element size.
auto mod_mask = ConstantInt::get(type, type->getScalarSizeInBits() - 1);
// The LSB of the result is the first size - n MSB of the second arg
auto lsb_shift = builder.CreateAnd(arg_shift, mod_mask);
// The MSB of the result is the first n LSB of the second arg
auto scalar_size = ConstantInt::get(type, type->getScalarSizeInBits());
auto msb_shift = builder.CreateSub(scalar_size, lsb_shift);

// "The resulting value is undefined if Shift is greater than or equal to
// the bit width of the components of Base."
// https://www.khronos.org/registry/SPIR-V/specs/unified1/SPIRV.html#Bit
if (!dyn_cast<ConstantInt>(arg_shift)) {
msb_shift = builder.CreateAnd(msb_shift, mod_mask);
}

auto hi_bits = builder.CreateShl(arg_hi, msb_shift);
auto lo_bits = builder.CreateLShr(arg_lo, lsb_shift);

return builder.CreateOr(lo_bits, hi_bits);
});
}

bool clspv::ReplaceLLVMIntrinsicsPass::replaceFshl(Function &F) {
return replaceCallsWithValue(F, [](CallInst *call) {
auto arg_hi = call->getArgOperand(0);
Expand Down
1 change: 1 addition & 0 deletions lib/ReplaceLLVMIntrinsicsPass.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct ReplaceLLVMIntrinsicsPass
bool replaceMemcpy(llvm::Module &M);
bool removeIntrinsicDeclaration(llvm::Function &F);
bool replaceBswap(llvm::Function &F);
bool replaceFshr(llvm::Function &F);
bool replaceFshl(llvm::Function &F);
bool replaceCountZeroes(llvm::Function &F, bool leading);
bool replaceCopysign(llvm::Function &F);
Expand Down
83 changes: 83 additions & 0 deletions test/LLVMIntrinsics/fshr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
; RUN: clspv-opt %s -o %t.ll --passes=replace-llvm-intrinsics
; RUN: FileCheck %s < %t.ll

target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "spir-unknown-unknown"

define void @fshr_i8(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) {
entry:
%result = call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
store i8 %result, ptr addrspace(1) %out
ret void
}

declare i8 @llvm.fshr.i8(i8, i8, i8)

; CHECK-NOT: llvm.fshr
; CHECK: [[and:%[0-9a-zA-Z_.]+]] = and i8 %c, 7
; CHECK: [[sub:%[0-9a-zA-Z_.]+]] = sub i8 8, [[and]]
; CHECK: [[and2:%[0-9a-zA-Z_.]+]] = and i8 [[sub]], 7
; CHECK: [[shl:%[0-9a-zA-Z_.]+]] = shl i8 %a, [[and2]]
; CHECK: [[lshr:%[0-9a-zA-Z_.]+]] = lshr i8 %b, [[and]]
; CHECK: [[or:%[0-9a-zA-Z_.]+]] = or i8 [[lshr]], [[shl]]
; CHECK: store i8 [[or]], ptr addrspace(1) %out



define void @fshr_i16(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) {
entry:
%result = call i16 @llvm.fshr.i16(i16 %a, i16 %b, i16 %c)
store i16 %result, ptr addrspace(1) %out
ret void
}

declare i16 @llvm.fshr.i16(i16, i16, i16)

; CHECK-NOT: llvm.fshr
; CHECK: [[and:%[0-9a-zA-Z_.]+]] = and i16 %c, 15
; CHECK: [[sub:%[0-9a-zA-Z_.]+]] = sub i16 16, [[and]]
; CHECK: [[and2:%[0-9a-zA-Z_.]+]] = and i16 [[sub]], 15
; CHECK: [[shl:%[0-9a-zA-Z_.]+]] = shl i16 %a, [[and2]]
; CHECK: [[lshr:%[0-9a-zA-Z_.]+]] = lshr i16 %b, [[and]]
; CHECK: [[or:%[0-9a-zA-Z_.]+]] = or i16 [[lshr]], [[shl]]
; CHECK: store i16 [[or]], ptr addrspace(1) %out



define void @fshr_i32(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
entry:
%result = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
store i32 %result, ptr addrspace(1) %out
ret void
}

declare i32 @llvm.fshr.i32(i32, i32, i32)

; CHECK-NOT: llvm.fshr
; CHECK: [[and:%[0-9a-zA-Z_.]+]] = and i32 %c, 31
; CHECK: [[sub:%[0-9a-zA-Z_.]+]] = sub i32 32, [[and]]
; CHECK: [[and2:%[0-9a-zA-Z_.]+]] = and i32 [[sub]], 31
; CHECK: [[shl:%[0-9a-zA-Z_.]+]] = shl i32 %a, [[and2]]
; CHECK: [[lshr:%[0-9a-zA-Z_.]+]] = lshr i32 %b, [[and]]
; CHECK: [[or:%[0-9a-zA-Z_.]+]] = or i32 [[lshr]], [[shl]]
; CHECK: store i32 [[or]], ptr addrspace(1) %



define void @fshr_i64(ptr addrspace(1) %out, i64 %a, i64 %b, i64 %c) {
entry:
%result = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
store i64 %result, ptr addrspace(1) %out
ret void
}

declare i64 @llvm.fshr.i64(i64, i64, i64)

; CHECK-NOT: llvm.fshr
; CHECK: [[and:%[0-9a-zA-Z_.]+]] = and i64 %c, 63
; CHECK: [[sub:%[0-9a-zA-Z_.]+]] = sub i64 64, [[and]]
; CHECK: [[and2:%[0-9a-zA-Z_.]+]] = and i64 [[sub]], 63
; CHECK: [[shl:%[0-9a-zA-Z_.]+]] = shl i64 %a, [[and2]]
; CHECK: [[lshr:%[0-9a-zA-Z_.]+]] = lshr i64 %b, [[and]]
; CHECK: [[or:%[0-9a-zA-Z_.]+]] = or i64 [[lshr]], [[shl]]
; CHECK: store i64 [[or]], ptr addrspace(1) %out

0 comments on commit eda331e

Please sign in to comment.